From a98a4f2b845caed8cf98f40d99ed0ae614ba262e Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Sat, 23 Mar 2019 14:17:19 +0100
Subject: [PATCH 01/12] simulator for bernoulli working

---
 batchglm/api/models/__init__.py       |  1 +
 batchglm/api/models/glm_bern.py       |  2 +
 batchglm/api/utils/random.py          |  2 +-
 batchglm/models/glm_bern/__init__.py  |  4 ++
 batchglm/models/glm_bern/estimator.py | 30 ++++++++++
 batchglm/models/glm_bern/external.py  | 11 ++++
 batchglm/models/glm_bern/model.py     | 83 +++++++++++++++++++++++++++
 batchglm/models/glm_bern/simulator.py | 47 +++++++++++++++
 batchglm/models/glm_bern/utils.py     | 73 +++++++++++++++++++++++
 batchglm/models/glm_beta/utils.py     |  1 -
 batchglm/utils/random.py              | 25 ++++++++
 11 files changed, 277 insertions(+), 2 deletions(-)
 create mode 100644 batchglm/api/models/glm_bern.py
 create mode 100644 batchglm/models/glm_bern/__init__.py
 create mode 100644 batchglm/models/glm_bern/estimator.py
 create mode 100644 batchglm/models/glm_bern/external.py
 create mode 100644 batchglm/models/glm_bern/model.py
 create mode 100644 batchglm/models/glm_bern/simulator.py
 create mode 100644 batchglm/models/glm_bern/utils.py

diff --git a/batchglm/api/models/__init__.py b/batchglm/api/models/__init__.py
index 6521a82a..5a3142c9 100644
--- a/batchglm/api/models/__init__.py
+++ b/batchglm/api/models/__init__.py
@@ -1,3 +1,4 @@
 from . import glm_nb
 from . import glm_norm
 from . import glm_beta
+from . import glm_bern
diff --git a/batchglm/api/models/glm_bern.py b/batchglm/api/models/glm_bern.py
new file mode 100644
index 00000000..deba28e0
--- /dev/null
+++ b/batchglm/api/models/glm_bern.py
@@ -0,0 +1,2 @@
+from batchglm.models.glm_bern import InputData, Model, Simulator
+#from batchglm.train.tf.glm_bern import Estimator
\ No newline at end of file
diff --git a/batchglm/api/utils/random.py b/batchglm/api/utils/random.py
index 7a5993bb..fb674530 100644
--- a/batchglm/api/utils/random.py
+++ b/batchglm/api/utils/random.py
@@ -1 +1 @@
-from batchglm.utils.random import NegativeBinomial, Normal, Beta
+from batchglm.utils.random import NegativeBinomial, Normal, Beta, Bernoulli
diff --git a/batchglm/models/glm_bern/__init__.py b/batchglm/models/glm_bern/__init__.py
new file mode 100644
index 00000000..efcf833d
--- /dev/null
+++ b/batchglm/models/glm_bern/__init__.py
@@ -0,0 +1,4 @@
+from .model import Model, Model_XArray
+from .external import InputData
+from .simulator import Simulator
+from .estimator import AbstractEstimator, EstimatorStoreXArray
\ No newline at end of file
diff --git a/batchglm/models/glm_bern/estimator.py b/batchglm/models/glm_bern/estimator.py
new file mode 100644
index 00000000..26ff2fa6
--- /dev/null
+++ b/batchglm/models/glm_bern/estimator.py
@@ -0,0 +1,30 @@
+import abc
+
+from .model import Model, Model_XArray
+from .external import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+
+
+class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
+    r"""
+    Estimator base class for generalized linear models (GLMs) with
+    bernoulli noise.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return ESTIMATOR_PARAMS
+
+
+class EstimatorStoreXArray(_EstimatorStore_XArray_GLM, AbstractEstimator, Model_XArray):
+
+    def __init__(self, estim: AbstractEstimator):
+        input_data = estim.input_data
+        # to_xarray triggers the get function of these properties and thereby
+        # causes evaluation of the properties that have not been computed during
+        # training, such as the hessian.
+        params = estim.to_xarray(
+            ["a_var", "b_var", "loss", "log_likelihood", "gradients", "fisher_inv"],
+            coords=input_data.data
+        )
+
+        Model_XArray.__init__(self, input_data, params)
\ No newline at end of file
diff --git a/batchglm/models/glm_bern/external.py b/batchglm/models/glm_bern/external.py
new file mode 100644
index 00000000..bb52b9f2
--- /dev/null
+++ b/batchglm/models/glm_bern/external.py
@@ -0,0 +1,11 @@
+from batchglm.models.base import SparseXArrayDataArray, SparseXArrayDataSet
+from batchglm.models.base_glm import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+from batchglm.models.base_glm import InputData, INPUT_DATA_PARAMS
+from batchglm.models.base_glm import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+from batchglm.models.base_glm import _Simulator_GLM
+from batchglm.models.base_glm import closedform_glm_mean, closedform_glm_scale
+
+import batchglm.data as data_utils
+import batchglm.utils.random as rand_utils
+from batchglm.utils.numeric import weighted_mean, weighted_variance
+from batchglm.utils.linalg import groupwise_solve_lm
\ No newline at end of file
diff --git a/batchglm/models/glm_bern/model.py b/batchglm/models/glm_bern/model.py
new file mode 100644
index 00000000..bc35123e
--- /dev/null
+++ b/batchglm/models/glm_bern/model.py
@@ -0,0 +1,83 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import xarray as xr
+import numpy as np
+
+from .external import InputData
+from .external import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+
+# Define distribution parameters:
+MODEL_PARAMS = MODEL_PARAMS.copy()
+MODEL_PARAMS.update({
+    "mu": ("observations", "features"),
+    "r": ("observations", "features"),
+})
+
+class Model(_Model_GLM, metaclass=abc.ABCMeta):
+    """
+    Generalized Linear Model (GLM) with bernoulli noise.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return MODEL_PARAMS
+
+    def link_loc(self, data):
+        return np.log(data/(1-data))
+
+    def inverse_link_loc(self, data):
+        return 1/(1+np.exp(-data))
+
+    def link_scale(self, data):
+        return np.log(data)
+
+    def inverse_link_scale(self, data):
+        return np.exp(data)
+
+    @property
+    def eta_loc(self) -> xr.DataArray:
+        # TODO: take this switch out once xr.dataset slicing yields dataarray with loc_names coordinate:
+        if isinstance(self.par_link_loc, xr.DataArray):
+            eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
+        else:
+            eta = np.matmul(self.design_loc.values, self.par_link_loc)
+
+        if self.size_factors is not None:
+            assert False, "size factors not allowed"
+        return eta
+
+    @property
+    def mu(self) -> xr.DataArray:
+        return self.location
+
+    @property
+    def r(self) -> xr.DataArray:
+        return self.scale
+
+
+def model_from_params(*args, **kwargs) -> Model:
+    (input_data, params) = _model_from_params(*args, **kwargs)
+    return Model_XArray(input_data, params)
+
+
+class Model_XArray(_Model_XArray_GLM, Model):
+    _input_data: InputData
+    params: xr.Dataset
+
+    def __init__(self, input_data: InputData, params: xr.Dataset):
+        super(_Model_XArray_GLM, self).__init__(input_data=input_data, params=params)
+        super(Model, self).__init__()
+
+    def __str__(self):
+        return "[%s.%s object at %s]: data=%s" % (
+            type(self).__module__,
+            type(self).__name__,
+            hex(id(self)),
+            self.params
+        )
+
+    def __repr__(self):
+        return self.__str__()
diff --git a/batchglm/models/glm_bern/simulator.py b/batchglm/models/glm_bern/simulator.py
new file mode 100644
index 00000000..c59c38a3
--- /dev/null
+++ b/batchglm/models/glm_bern/simulator.py
@@ -0,0 +1,47 @@
+import numpy as np
+
+from .model import Model
+from .external import rand_utils, _Simulator_GLM
+
+
+class Simulator(_Simulator_GLM, Model):
+    """
+    Simulator for Generalized Linear Models (GLMs) with negative binomial noise.
+    Uses the natural logarithm as linker function.
+    """
+
+    def __init__(
+            self,
+            num_observations=1000,
+            num_features=100
+    ):
+        Model.__init__(self)
+        _Simulator_GLM.__init__(
+            self,
+            num_observations=num_observations,
+            num_features=num_features
+        )
+
+    def generate_params(
+            self,
+            rand_fn_ave=lambda shape: np.random.uniform(0.4, 0.4, shape),
+            rand_fn=None,
+            rand_fn_loc=lambda shape: np.random.uniform(0.4, 0.6, shape),
+            rand_fn_scale=lambda shape: np.zeros(shape),
+        ):
+        self._generate_params(
+            self,
+            rand_fn_ave=rand_fn_ave,
+            rand_fn=rand_fn,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale,
+        )
+
+    def generate_data(self):
+        """
+        Sample random data based on bernoulli distribution and parameters.
+        """
+        self.data["X"] = (
+            self.param_shapes()["X"],
+            rand_utils.Bernoulli(mean=self.mu).sample()
+        )
diff --git a/batchglm/models/glm_bern/utils.py b/batchglm/models/glm_bern/utils.py
new file mode 100644
index 00000000..5fee189c
--- /dev/null
+++ b/batchglm/models/glm_bern/utils.py
@@ -0,0 +1,73 @@
+from typing import Union
+
+import numpy as np
+import xarray as xr
+
+from .external import closedform_glm_mean, closedform_glm_scale
+from .external import SparseXArrayDataArray
+
+
+def closedform_bern_glm_logitmu(
+        X: Union[xr.DataArray, SparseXArrayDataArray],
+        design_loc,
+        constraints_loc,
+        size_factors=None,
+        link_fn=lambda data: np.log(data/(1-data)),
+        inv_link_fn=lambda data: 1/(1+np.exp(-data))
+):
+    r"""
+    Calculates a closed-form solution for the `mu` parameters of bernoulli GLMs.
+
+    :param X: The sample data
+    :param design_loc: design matrix for location
+    :param constraints_loc: tensor (all parameters x dependent parameters)
+        Tensor that encodes how complete parameter set which includes dependent
+        parameters arises from indepedent parameters: all = <constraints, indep>.
+        This form of constraints is used in vector generalized linear models (VGLMs).
+    :param size_factors: size factors for X
+    :return: tuple: (groupwise_means, mu, rmsd)
+    """
+    return closedform_glm_mean(
+        X=X,
+        dmat=design_loc,
+        constraints=constraints_loc,
+        size_factors=size_factors,
+        link_fn=link_fn,
+        inv_link_fn=inv_link_fn
+    )
+
+#Todo
+# def closedform_nb_glm_logphi(
+#         X: Union[xr.DataArray, SparseXArrayDataArray],
+#         design_scale: xr.DataArray,
+#         constraints=None,
+#         size_factors=None,
+#         groupwise_means=None,
+#         link_fn=np.log
+# ):
+#     r"""
+#     Calculates a closed-form solution for the log-scale parameters of negative-binomial GLMs.
+#     Based on the Method-of-Moments estimator.
+#
+#     :param X: The sample data
+#     :param design_scale: design matrix for scale
+#     :param constraints: some design constraints
+#     :param size_factors: size factors for X
+#     :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
+#     :return: tuple (groupwise_scales, logphi, rmsd)
+#     """
+#
+#     def compute_scales_fun(variance, mean):
+#         denominator = np.fmax(variance - mean, np.sqrt(np.nextafter(0, 1, dtype=variance.dtype)))
+#         groupwise_scales = np.square(mean) / denominator
+#         return groupwise_scales
+#
+#     return closedform_glm_scale(
+#         X=X,
+#         design_scale=design_scale,
+#         constraints=constraints,
+#         size_factors=size_factors,
+#         groupwise_means=groupwise_means,
+#         link_fn=link_fn,
+#         compute_scales_fun=compute_scales_fun
+#     )
diff --git a/batchglm/models/glm_beta/utils.py b/batchglm/models/glm_beta/utils.py
index 900735e6..a3c8592d 100644
--- a/batchglm/models/glm_beta/utils.py
+++ b/batchglm/models/glm_beta/utils.py
@@ -35,7 +35,6 @@ def closedform_beta_glm_logitmean(
         dmat=design_loc,
         constraints=constraints_loc,
         size_factors=size_factors,
-        weights=None,
         link_fn=link_fn,
         inv_link_fn=inv_link_fn
     )
diff --git a/batchglm/utils/random.py b/batchglm/utils/random.py
index 1a6c6d54..af70dcaf 100644
--- a/batchglm/utils/random.py
+++ b/batchglm/utils/random.py
@@ -197,4 +197,29 @@ def sample(self, size=None):
             b=self.q,
             size=size
         )
+        return random_data
+
+
+class Bernoulli:
+    r"""
+    Bernoulli distribution.
+    """
+
+    p: np.ndarray
+
+    def __init__(self, mean):
+        self.p=mean
+
+    def sample(self, size=None):
+        """
+        Sample from all distributions data of size `size`.
+        :param size: The size
+        :return: numpy array containing sampled data
+
+        """
+        random_data = np.random.binomial(
+            n=1,
+            p=self.p,
+            size=size
+        )
         return random_data
\ No newline at end of file

From cdd51a03734f038b5f1ced160fa8d9e54d324a64 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Tue, 26 Mar 2019 15:26:51 +0100
Subject: [PATCH 02/12] one small fix and output for (not yet finished)
 debugging of gradient

---
 batchglm/models/glm_beta/simulator.py                |  4 ++--
 batchglm/train/tf/base_glm_all/estimator_graph.py    |  5 ++++-
 batchglm/train/tf/glm_beta/estimator.py              |  9 +++++++--
 batchglm/train/tf/glm_beta/model.py                  | 10 ++++++++--
 batchglm/unit_test/glm_all/test_jacobians_glm_all.py |  4 ++++
 5 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
index d7a9dd18..7d9143d7 100644
--- a/batchglm/models/glm_beta/simulator.py
+++ b/batchglm/models/glm_beta/simulator.py
@@ -26,8 +26,8 @@ def generate_params(
             self,
             rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
             rand_fn=None,
-            rand_fn_loc=lambda shape: np.random.uniform(0.05, 0.15, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1e5, 2*1e5, shape),
+            rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
+            rand_fn_scale=lambda shape: np.random.uniform(1e1, 2*1e1, shape),
         ):
         self._generate_params(
             self,
diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
index 5cea4784..e7545a68 100644
--- a/batchglm/train/tf/base_glm_all/estimator_graph.py
+++ b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -532,7 +532,10 @@ def __init__(
             self.hessians = self.full_data_model.hessians_final
             self.fisher_inv = op_utils.pinv(-self.full_data_model.hessians_final)  # TODO switch for fim?
             # Summary statistics on feature-wise model gradients:
-            self.gradients = tf.reduce_sum(tf.abs(self.full_data_model.neg_jac_final / num_observations), axis=1)
+            a = tf.abs(self.full_data_model.neg_jac_final / num_observations)
+            b = tf.print(a)
+            with tf.control_dependencies([b]):
+                self.gradients = tf.reduce_sum(a, axis=1)
 
         with tf.name_scope('summaries'):
             if extended_summary:
diff --git a/batchglm/train/tf/glm_beta/estimator.py b/batchglm/train/tf/glm_beta/estimator.py
index f642bb25..3a9563ef 100644
--- a/batchglm/train/tf/glm_beta/estimator.py
+++ b/batchglm/train/tf/glm_beta/estimator.py
@@ -116,6 +116,11 @@ def __init__(
         init_a = init_a.astype(dtype)
         init_b = init_b.astype(dtype)
 
+        print("init_a")
+        print(init_a)
+        print("init_b")
+        print(init_b)
+
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
                 provide_hessian = True
@@ -210,7 +215,7 @@ def init_par(
                     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
                     self._train_loc = True
 
-                    logger.debug("Using all_zero initialization for mean")
+                    logging.getLogger("batchglm").debug("Using all zero initialization for mean")
                 else:
                     raise ValueError("init_a string %s not recognized" % init_a)
                 logging.getLogger("batchglm").debug("Should train mean: %s", self._train_loc)
@@ -224,7 +229,7 @@ def init_par(
                         design_scale=input_data.design_scale[:, [0]],
                         constraints=input_data.constraints_scale[[0], [0]].values,
                         size_factors=size_factors_init,
-                        groupwise_means=groupwise_means,
+                        groupwise_means=None,
                         link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
                     )
                     init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
diff --git a/batchglm/train/tf/glm_beta/model.py b/batchglm/train/tf/glm_beta/model.py
index a647f1d6..ed9c9631 100644
--- a/batchglm/train/tf/glm_beta/model.py
+++ b/batchglm/train/tf/glm_beta/model.py
@@ -107,8 +107,14 @@ def __init__(
                     - tf.lgamma(one_minus_loc * model_scale)\
                     + (model_scale * model_loc - 1) * tf.log(Xdense)\
                     + (one_minus_loc * model_scale - 1) * tf.log(one_minus_X)
-
-        log_probs = self.tf_clip_param(log_probs, "log_probs")
+        a = tf.print("log_probs: \n", log_probs)
+        b = tf.print("model_loc: \n", model_loc)
+        c = tf.print("model_scale: \n", model_scale)
+        d = tf.print("X: \n", X)
+        e = tf.print("a_var: \n", a_var)
+        f = tf.print("eta_loc: \n", eta_loc)
+        with tf.control_dependencies([a, b, c, d, e, f]):
+            log_probs = self.tf_clip_param(log_probs, "log_probs")
 
         # Variance:
         sigma2 = (model_loc * one_minus_loc) / (1 + model_scale)
diff --git a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
index dde1a746..e1c6588b 100644
--- a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
@@ -132,6 +132,10 @@ def compare_jacs(
         t1_tf = time.time()
         t_tf = t1_tf - t0_tf
 
+
+        print("J_analytic: ", J_analytic)
+        print("J_tf: ", J_tf)
+
         # Make sure that jacobians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(J_analytic)) > 1e-10, \
             "jacobians too small to perform test: %f" % np.sum(np.abs(J_analytic))

From 3e1a27c947bb0ff94074f79cac20944143cb5900 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Wed, 27 Mar 2019 13:12:48 +0100
Subject: [PATCH 03/12] wrote tf part for bernoulli, graph-test only working
 for sparse=False

---
 batchglm/api/models/glm_bern.py               |   2 +-
 batchglm/models/glm_bern/model.py             |   4 +-
 batchglm/models/glm_bern/simulator.py         |   4 +-
 batchglm/models/glm_bern/utils.py             |  40 +--
 batchglm/train/tf/base_glm_all/estimator.py   |   4 +
 .../train/tf/base_glm_all/estimator_graph.py  |   6 +
 .../train/tf/base_glm_all/external_bern.py    |   6 +
 .../tf/base_glm_all/reducible_tensors.py      |   2 +
 batchglm/train/tf/glm_bern/__init__.py        |   7 +
 batchglm/train/tf/glm_bern/estimator.py       | 241 ++++++++++++++++++
 batchglm/train/tf/glm_bern/estimator_graph.py |  12 +
 batchglm/train/tf/glm_bern/external.py        |  20 ++
 batchglm/train/tf/glm_bern/fim.py             |  26 ++
 batchglm/train/tf/glm_bern/hessians.py        |  37 +++
 batchglm/train/tf/glm_bern/jacobians.py       |  35 +++
 batchglm/train/tf/glm_bern/model.py           | 133 ++++++++++
 .../train/tf/glm_bern/reducible_tensors.py    |  13 +
 .../train/tf/glm_bern/training_strategies.py  |  27 ++
 .../unit_test/glm_all/test_graph_glm_all.py   |  31 +++
 19 files changed, 607 insertions(+), 43 deletions(-)
 create mode 100644 batchglm/train/tf/base_glm_all/external_bern.py
 create mode 100644 batchglm/train/tf/glm_bern/__init__.py
 create mode 100644 batchglm/train/tf/glm_bern/estimator.py
 create mode 100644 batchglm/train/tf/glm_bern/estimator_graph.py
 create mode 100644 batchglm/train/tf/glm_bern/external.py
 create mode 100644 batchglm/train/tf/glm_bern/fim.py
 create mode 100644 batchglm/train/tf/glm_bern/hessians.py
 create mode 100644 batchglm/train/tf/glm_bern/jacobians.py
 create mode 100644 batchglm/train/tf/glm_bern/model.py
 create mode 100644 batchglm/train/tf/glm_bern/reducible_tensors.py
 create mode 100644 batchglm/train/tf/glm_bern/training_strategies.py

diff --git a/batchglm/api/models/glm_bern.py b/batchglm/api/models/glm_bern.py
index deba28e0..94736a89 100644
--- a/batchglm/api/models/glm_bern.py
+++ b/batchglm/api/models/glm_bern.py
@@ -1,2 +1,2 @@
 from batchglm.models.glm_bern import InputData, Model, Simulator
-#from batchglm.train.tf.glm_bern import Estimator
\ No newline at end of file
+from batchglm.train.tf.glm_bern import Estimator
\ No newline at end of file
diff --git a/batchglm/models/glm_bern/model.py b/batchglm/models/glm_bern/model.py
index bc35123e..748ab9ba 100644
--- a/batchglm/models/glm_bern/model.py
+++ b/batchglm/models/glm_bern/model.py
@@ -32,10 +32,10 @@ def inverse_link_loc(self, data):
         return 1/(1+np.exp(-data))
 
     def link_scale(self, data):
-        return np.log(data)
+        return data
 
     def inverse_link_scale(self, data):
-        return np.exp(data)
+        return data
 
     @property
     def eta_loc(self) -> xr.DataArray:
diff --git a/batchglm/models/glm_bern/simulator.py b/batchglm/models/glm_bern/simulator.py
index c59c38a3..2b2c938a 100644
--- a/batchglm/models/glm_bern/simulator.py
+++ b/batchglm/models/glm_bern/simulator.py
@@ -6,8 +6,8 @@
 
 class Simulator(_Simulator_GLM, Model):
     """
-    Simulator for Generalized Linear Models (GLMs) with negative binomial noise.
-    Uses the natural logarithm as linker function.
+    Simulator for Generalized Linear Models (GLMs) with bernoulli noise.
+    Uses logit linker function.
     """
 
     def __init__(
diff --git a/batchglm/models/glm_bern/utils.py b/batchglm/models/glm_bern/utils.py
index 5fee189c..82533f62 100644
--- a/batchglm/models/glm_bern/utils.py
+++ b/batchglm/models/glm_bern/utils.py
@@ -3,7 +3,7 @@
 import numpy as np
 import xarray as xr
 
-from .external import closedform_glm_mean, closedform_glm_scale
+from .external import closedform_glm_mean
 from .external import SparseXArrayDataArray
 
 
@@ -34,40 +34,4 @@ def closedform_bern_glm_logitmu(
         size_factors=size_factors,
         link_fn=link_fn,
         inv_link_fn=inv_link_fn
-    )
-
-#Todo
-# def closedform_nb_glm_logphi(
-#         X: Union[xr.DataArray, SparseXArrayDataArray],
-#         design_scale: xr.DataArray,
-#         constraints=None,
-#         size_factors=None,
-#         groupwise_means=None,
-#         link_fn=np.log
-# ):
-#     r"""
-#     Calculates a closed-form solution for the log-scale parameters of negative-binomial GLMs.
-#     Based on the Method-of-Moments estimator.
-#
-#     :param X: The sample data
-#     :param design_scale: design matrix for scale
-#     :param constraints: some design constraints
-#     :param size_factors: size factors for X
-#     :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
-#     :return: tuple (groupwise_scales, logphi, rmsd)
-#     """
-#
-#     def compute_scales_fun(variance, mean):
-#         denominator = np.fmax(variance - mean, np.sqrt(np.nextafter(0, 1, dtype=variance.dtype)))
-#         groupwise_scales = np.square(mean) / denominator
-#         return groupwise_scales
-#
-#     return closedform_glm_scale(
-#         X=X,
-#         design_scale=design_scale,
-#         constraints=constraints,
-#         size_factors=size_factors,
-#         groupwise_means=groupwise_means,
-#         link_fn=link_fn,
-#         compute_scales_fun=compute_scales_fun
-#     )
+    )
\ No newline at end of file
diff --git a/batchglm/train/tf/base_glm_all/estimator.py b/batchglm/train/tf/base_glm_all/estimator.py
index 977ece1e..3b21219f 100644
--- a/batchglm/train/tf/base_glm_all/estimator.py
+++ b/batchglm/train/tf/base_glm_all/estimator.py
@@ -73,6 +73,8 @@ def __init__(
             from .external_norm import EstimatorGraph
         elif noise_model == "beta":
             from .external_beta import EstimatorGraph
+        elif noise_model == "bern":
+            from .external_bern import EstimatorGraph
         else:
             raise ValueError("noise model %s was not recognized" % noise_model)
         self.noise_model = noise_model
@@ -356,6 +358,8 @@ def finalize(self):
             from .external_norm import EstimatorStoreXArray
         elif self.noise_model == "beta":
             from .external_beta import EstimatorStoreXArray
+        elif self.noise_model == "bern":
+            from .external_bern import EstimatorStoreXArray
         else:
             raise ValueError("noise model not recognized")
 
diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
index 5cea4784..20fb9ab9 100644
--- a/batchglm/train/tf/base_glm_all/estimator_graph.py
+++ b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -66,6 +66,8 @@ def __init__(
             from .external_norm import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
+        elif noise_model == "bern":
+            from .external_bern import ReducibleTensors
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
@@ -252,6 +254,8 @@ def __init__(
             from .external_norm import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
+        elif noise_model == "bern":
+            from .external_bern import ReducibleTensors
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
@@ -433,6 +437,8 @@ def __init__(
             from .external_norm import ModelVars
         elif noise_model == "beta":
             from .external_beta import ModelVars
+        elif noise_model == "bern":
+            from .external_bern import ModelVars
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
diff --git a/batchglm/train/tf/base_glm_all/external_bern.py b/batchglm/train/tf/base_glm_all/external_bern.py
new file mode 100644
index 00000000..6d8e071a
--- /dev/null
+++ b/batchglm/train/tf/base_glm_all/external_bern.py
@@ -0,0 +1,6 @@
+from batchglm.train.tf.glm_bern import EstimatorGraph
+from batchglm.train.tf.glm_bern import BasicModelGraph, ModelVars, ProcessModel
+from batchglm.train.tf.glm_bern import Hessians, FIM, Jacobians, ReducibleTensors
+
+from batchglm.models.glm_bern import AbstractEstimator, EstimatorStoreXArray, InputData, Model
+from batchglm.models.glm_bern.utils import closedform_bern_glm_logitmu
\ No newline at end of file
diff --git a/batchglm/train/tf/base_glm_all/reducible_tensors.py b/batchglm/train/tf/base_glm_all/reducible_tensors.py
index bbd9461f..dbe20689 100644
--- a/batchglm/train/tf/base_glm_all/reducible_tensors.py
+++ b/batchglm/train/tf/base_glm_all/reducible_tensors.py
@@ -37,6 +37,8 @@ def assemble_tensors(self, idx, data):
             from .external_norm import BasicModelGraph
         elif self.noise_model == "beta":
             from .external_beta import BasicModelGraph
+        elif self.noise_model == "bern":
+            from .external_bern import BasicModelGraph
         else:
             raise ValueError("noise model %s was not recognized" % self.noise_model)
 
diff --git a/batchglm/train/tf/glm_bern/__init__.py b/batchglm/train/tf/glm_bern/__init__.py
new file mode 100644
index 00000000..4db081bb
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/__init__.py
@@ -0,0 +1,7 @@
+from .estimator import Estimator
+from .estimator_graph import EstimatorGraph
+from .model import BasicModelGraph, ModelVars, ProcessModel
+from .hessians import Hessians
+from .fim import FIM
+from .jacobians import Jacobians
+from .reducible_tensors import ReducibleTensors
diff --git a/batchglm/train/tf/glm_bern/estimator.py b/batchglm/train/tf/glm_bern/estimator.py
new file mode 100644
index 00000000..57534d34
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/estimator.py
@@ -0,0 +1,241 @@
+import logging
+from typing import Union
+
+import numpy as np
+import tensorflow as tf
+
+from .external import AbstractEstimator, EstimatorAll, ESTIMATOR_PARAMS, InputData, Model
+from .external import closedform_bern_glm_logitmu
+from .external import SparseXArrayDataArray
+from .estimator_graph import EstimatorGraph
+from .model import ProcessModel
+from .training_strategies import TrainingStrategies
+
+logger = logging.getLogger("batchglm")
+
+
+class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
+    """
+    Estimator for Generalized Linear Models (GLMs) with bernoulli noise.
+    Uses a logit linker function.
+    """
+
+    def __init__(
+            self,
+            input_data: InputData,
+            batch_size: int = 500,
+            graph: tf.Graph = None,
+            init_model: Model = None,
+            init_a: Union[np.ndarray, str] = "AUTO",
+            init_b: Union[np.ndarray, str] = "AUTO",
+            quick_scale: bool = False,
+            model: EstimatorGraph = None,
+            provide_optimizers: dict = {
+                "gd": True,
+                "adam": True,
+                "adagrad": True,
+                "rmsprop": True,
+                "nr": True,
+                "nr_tr": True,
+                "irls": True,
+                "irls_gd": True,
+                "irls_tr": True,
+                "irls_gd_tr": True,
+            },
+            provide_batched: bool = False,
+            provide_fim: bool = False,
+            provide_hessian: bool = False,
+            optim_algos: list = [],
+            extended_summary=False,
+            dtype="float64"
+    ):
+        """
+        Performs initialisation and creates a new estimator.
+
+        :param input_data: InputData
+            The input data
+        :param batch_size: int
+            Size of mini-batches used.
+        :param graph: (optional) tf.Graph
+        :param init_model: (optional)
+            If provided, this model will be used to initialize this Estimator.
+        :param init_a: (Optional)
+            Low-level initial values for a. Can be:
+
+            - str:
+                * "auto": automatically choose best initialization
+                * "random": initialize with random values
+                * "standard": initialize intercept with observed mean
+                * "init_model": initialize with another model (see `ìnit_model` parameter)
+                * "closed_form": try to initialize with closed form
+            - np.ndarray: direct initialization of 'a'
+        :param init_b: (won't be used)
+        :param quick_scale: (won't be used)
+        :param model: EstimatorGraph
+            EstimatorGraph to use. Basically for debugging.
+        :param provide_optimizers:
+
+            E.g.    {"gd": False, "adam": False, "adagrad": False, "rmsprop": False,
+                    "nr": False, "nr_tr": True,
+                    "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}
+        :param provide_batched: bool
+            Whether mini-batched optimizers should be provided.
+        :param provide_fim: Whether to compute fisher information matrix during training
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param provide_hessian: Whether to compute hessians during training
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param optim_algos: Algorithms that you want to use on this object. Depending on that,
+            the hessian and/or fisher information matrix are computed.
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param extended_summary: Include detailed information in the summaries.
+            Will increase runtime of summary writer, use only for debugging.
+        :param dtype: Precision used in tensorflow.
+        """
+        self.TrainingStrategies = TrainingStrategies
+
+        self._input_data = input_data
+        self._train_loc = True
+        self._train_scale = True
+
+        (init_a, init_b) = self.init_par(
+            input_data=input_data,
+            init_a=init_a,
+            init_b=init_b,
+            init_model=init_model
+        )
+        init_a = init_a.astype(dtype)
+        init_b = init_b.astype(dtype)
+        if quick_scale:
+            self._train_scale = False
+
+        print("init_a: \n \n", init_a)
+        print("init_b: \n \n", init_b)
+
+        if len(optim_algos) > 0:
+            if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
+                provide_hessian = True
+            if np.any([x.lower() in ["irls", "irls_tr", "irls_gd", "irls_gd_tr"] for x in optim_algos]):
+                provide_fim = True
+
+        EstimatorAll.__init__(
+            self=self,
+            input_data=input_data,
+            batch_size=batch_size,
+            graph=graph,
+            init_a=init_a,
+            init_b=init_b,
+            model=model,
+            provide_optimizers=provide_optimizers,
+            provide_batched=provide_batched,
+            provide_fim=provide_fim,
+            provide_hessian=provide_hessian,
+            extended_summary=extended_summary,
+            noise_model="bern",
+            dtype=dtype
+        )
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return ESTIMATOR_PARAMS
+
+    def init_par(
+            self,
+            input_data,
+            init_a,
+            init_b,
+            init_model
+    ):
+        r"""
+        standard:
+        Only initialise intercept and keep other coefficients as zero.
+
+        closed-form:
+        Initialize with Maximum Likelihood / Maximum of Momentum estimators
+
+        Idea:
+        $$
+            \theta &= f(x) \\
+            \Rightarrow f^{-1}(\theta) &= x \\
+                &= (D \cdot D^{+}) \cdot x \\
+                &= D \cdot (D^{+} \cdot x) \\
+                &= D \cdot x' = f^{-1}(\theta)
+        $$
+        """
+
+        size_factors_init = input_data.size_factors
+        if size_factors_init is not None:
+            size_factors_init = np.expand_dims(size_factors_init, axis=1)
+            size_factors_init = np.broadcast_to(
+                array=size_factors_init,
+                shape=[input_data.num_observations, input_data.num_features]
+            )
+
+        if init_model is None:
+            groupwise_means = None
+            init_a_str = None
+            if isinstance(init_a, str):
+                init_a_str = init_a.lower()
+                # Chose option if auto was chosen
+                if init_a.lower() == "auto":
+                    init_a = "closed_form"
+
+                if init_a.lower() == "closed_form":
+                    groupwise_means, init_a, rmsd_a = closedform_bern_glm_logitmu(
+                        X=input_data.X,
+                        design_loc=input_data.design_loc,
+                        constraints_loc=input_data.constraints_loc.values,
+                        size_factors=size_factors_init,
+                        link_fn=lambda mu: np.log(self.np_clip_param(mu, "mu")/(1-self.np_clip_param(mu, "mu")))
+                    )
+
+                    # train mu, if the closed-form solution is inaccurate
+                    self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0)
+
+                    if input_data.size_factors is not None:
+                        if np.any(input_data.size_factors != 1):
+                            self._train_loc = True
+
+                    logger.debug("Using closed-form MLE initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
+                elif init_a.lower() == "standard":
+                    if isinstance(input_data.X, SparseXArrayDataArray):
+                        overall_means = input_data.X.mean(dim="observations")
+                    else:
+                        overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
+                    overall_means = self.np_clip_param(overall_means, "mu")
+
+                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    init_a[0, :] = np.log(overall_means/(1-overall_means))
+                    self._train_loc = True
+
+                    logger.debug("Using standard initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
+                elif init_a.lower() == "all_zero":
+                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    self._train_loc = True
+
+                    logger.debug("Using all_zero initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
+                else:
+                    raise ValueError("init_a string %s not recognized" % init_a)
+
+        else:
+            if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"):
+                my_loc_names = set(input_data.loc_names.values)
+                my_loc_names = my_loc_names.intersection(set(init_model.input_data.loc_names.values))
+
+                init_loc = np.zeros([input_data.num_loc_params, input_data.num_features])
+                for parm in my_loc_names:
+                    init_idx = np.where(init_model.input_data.loc_names == parm)[0]
+                    my_idx = np.where(input_data.loc_names == parm)[0]
+                    init_loc[my_idx] = init_model.a_var[init_idx]
+
+                init_a = init_loc
+                logger.debug("Using initialization based on input model for mean")
+        init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
+
+        return init_a, init_b
+
+    @property
+    def input_data(self) -> InputData:
+        return self._input_data
diff --git a/batchglm/train/tf/glm_bern/estimator_graph.py b/batchglm/train/tf/glm_bern/estimator_graph.py
new file mode 100644
index 00000000..8e609600
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/estimator_graph.py
@@ -0,0 +1,12 @@
+import logging
+
+from .model import ProcessModel
+from .external import EstimatorGraphAll
+
+logger = logging.getLogger(__name__)
+
+
+class EstimatorGraph(ProcessModel, EstimatorGraphAll):
+    """
+    Full class.
+    """
diff --git a/batchglm/train/tf/glm_bern/external.py b/batchglm/train/tf/glm_bern/external.py
new file mode 100644
index 00000000..3fcd24ee
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/external.py
@@ -0,0 +1,20 @@
+import batchglm.data as data_utils
+
+from batchglm.models.base.input import SparseXArrayDataSet, SparseXArrayDataArray
+from batchglm.models.glm_bern import AbstractEstimator, EstimatorStoreXArray, InputData, Model
+from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
+from batchglm.models.glm_bern.utils import closedform_bern_glm_logitmu
+
+import batchglm.train.tf.ops as op_utils
+import batchglm.train.tf.train as train_utils
+from batchglm.train.tf.base import TFEstimatorGraph, MonitoredTFEstimator
+
+from batchglm.train.tf.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BasicModelGraphGLM
+from batchglm.train.tf.base_glm import ESTIMATOR_PARAMS, ProcessModelGLM, ModelVarsGLM
+from batchglm.train.tf.base_glm import HessiansGLM, FIMGLM, JacobiansGLM
+
+from batchglm.train.tf.base_glm_all import EstimatorAll, EstimatorGraphAll, FIMGLMALL, HessianGLMALL, JacobiansGLMALL, ReducableTensorsGLMALL
+
+import batchglm.utils.random as rand_utils
+from batchglm.utils.linalg import groupwise_solve_lm
+from batchglm import pkg_constants
diff --git a/batchglm/train/tf/glm_bern/fim.py b/batchglm/train/tf/glm_bern/fim.py
new file mode 100644
index 00000000..c0d20183
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/fim.py
@@ -0,0 +1,26 @@
+import tensorflow as tf
+
+import logging
+
+from .external import FIMGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class FIM(FIMGLMALL):
+
+    def _weight_fim_aa(
+            self,
+            loc,
+            scale
+    ):
+        const = - loc * (1-loc)
+
+        return const
+
+    def _weight_fim_bb(
+            self,
+            loc,
+            scale
+    ):
+        return tf.zeros_like(loc)
diff --git a/batchglm/train/tf/glm_bern/hessians.py b/batchglm/train/tf/glm_bern/hessians.py
new file mode 100644
index 00000000..5362cf0a
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/hessians.py
@@ -0,0 +1,37 @@
+import tensorflow as tf
+
+import logging
+
+from .external import HessianGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class Hessians(HessianGLMALL):
+
+    def _weight_hessian_ab(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        return tf.zeros_like(loc)
+
+    def _weight_hessian_aa(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        const = - loc * (1-loc)
+        return const
+
+    def _weight_hessian_bb(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        return tf.zeros_like(loc)
+
+
diff --git a/batchglm/train/tf/glm_bern/jacobians.py b/batchglm/train/tf/glm_bern/jacobians.py
new file mode 100644
index 00000000..be92bd80
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/jacobians.py
@@ -0,0 +1,35 @@
+import logging
+
+import tensorflow as tf
+
+from .external import JacobiansGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class Jacobians(JacobiansGLMALL):
+
+    def _weights_jac_a(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            one_minus_X = - tf.sparse.add(X, -1)
+            Xdense = tf.sparse.to_dense(X)
+        else:
+            one_minus_X = 1 - X
+            Xdense = X
+
+        const = Xdense*(1-loc) - (one_minus_X)*loc
+
+        return const
+
+    def _weights_jac_b(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        return tf.zeros_like(loc)
diff --git a/batchglm/train/tf/glm_bern/model.py b/batchglm/train/tf/glm_bern/model.py
new file mode 100644
index 00000000..398ce5e2
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/model.py
@@ -0,0 +1,133 @@
+import logging
+
+import tensorflow as tf
+
+import numpy as np
+
+from .external import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM
+from .external import pkg_constants
+
+logger = logging.getLogger(__name__)
+
+
+class ProcessModel(ProcessModelGLM):
+
+    def param_bounds(
+            self,
+            dtype
+    ):
+        if isinstance(dtype, tf.DType):
+            dmin = dtype.min
+            dmax = dtype.max
+            dtype = dtype.as_numpy_dtype
+        else:
+            dtype = np.dtype(dtype)
+            dmin = np.finfo(dtype).min
+            dmax = np.finfo(dtype).max
+            dtype = dtype.type
+
+        zero = np.nextafter(0, np.inf, dtype=dtype)
+        one = np.nextafter(1, -np.inf, dtype=dtype)
+
+        sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
+        bounds_min = {
+            "a_var": np.log(zero/(1-zero)) / sf,
+            "b_var": np.log(zero) / sf,
+            "eta_loc": np.log(zero/(1-zero)) / sf,
+            "eta_scale": np.log(zero) / sf,
+            "mu": np.nextafter(0, np.inf, dtype=dtype),
+            "r": np.nextafter(0, np.inf, dtype=dtype),
+            "probs": dtype(0),
+            "log_probs": np.log(zero),
+        }
+        bounds_max = {
+            "a_var": np.log(one/(1-one)) / sf,
+            "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            "eta_loc": np.log(one/(1-one)) / sf,
+            "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            "mu": one,
+            "r": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "probs": dtype(1),
+            "log_probs": dtype(0),
+        }
+        return bounds_min, bounds_max
+
+
+class ModelVars(ProcessModel, ModelVarsGLM):
+    """
+    Full class.
+    """
+
+
+class BasicModelGraph(ProcessModel, BasicModelGraphGLM):
+
+    def __init__(
+            self,
+            X,
+            design_loc,
+            design_scale,
+            constraints_loc,
+            constraints_scale,
+            a_var,
+            b_var,
+            dtype,
+            size_factors=None
+    ):
+        a_var = self.tf_clip_param(a_var, "a_var")
+        b_var = self.tf_clip_param(b_var, "b_var")
+
+        if constraints_loc is not None:
+            eta_loc = tf.matmul(design_loc, tf.matmul(constraints_loc, a_var))
+        else:
+            eta_loc = tf.matmul(design_loc, a_var)
+
+        if size_factors is not None:
+            eta_loc = tf.add(eta_loc, tf.log(size_factors))
+
+        eta_loc = self.tf_clip_param(eta_loc, "eta_loc")
+
+        if constraints_scale is not None:
+            eta_scale = tf.matmul(design_scale, tf.matmul(constraints_scale, b_var))
+        else:
+            eta_scale = tf.matmul(design_scale, b_var)
+
+        eta_scale = self.tf_clip_param(eta_scale, "eta_scale")
+        
+        # Inverse linker functions:
+        model_loc = 1 / (1 + tf.exp(-eta_loc))
+        model_scale = eta_scale
+
+        # Log-likelihood:
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            one_minus_X = -tf.sparse.add(X, -1)
+            Xdense = tf.sparse.to_dense(X)
+        else:
+            one_minus_X = 1 - X
+            Xdense = X
+
+        log_probs = Xdense*tf.log(model_loc) + (one_minus_X)*tf.log(1 - model_loc)
+        log_probs = self.tf_clip_param(log_probs, "log_probs")
+
+        # Variance:
+        sigma2 = model_loc*(1-model_loc)
+
+        self.X = X
+        self.design_loc = design_loc
+        self.design_scale = design_scale
+        self.constraints_loc = constraints_loc
+        self.constraints_scale = constraints_scale
+        self.a_var = a_var
+        self.b_var = b_var
+        self.size_factors = size_factors
+        self.dtype = dtype
+
+        self.eta_loc = eta_loc
+        self.eta_scale = eta_scale
+        self.model_loc = model_loc
+        self.model_scale = model_scale
+        self.mu = model_loc
+        self.r = model_scale
+
+        self.log_probs = log_probs
+
+        self.sigma2 = sigma2
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_bern/reducible_tensors.py b/batchglm/train/tf/glm_bern/reducible_tensors.py
new file mode 100644
index 00000000..862ccaf8
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/reducible_tensors.py
@@ -0,0 +1,13 @@
+import logging
+
+from .external import ReducableTensorsGLMALL
+from .hessians import Hessians
+from .jacobians import Jacobians
+from .fim import FIM
+
+logger = logging.getLogger("batchglm")
+
+
+class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL):
+    """
+    """
diff --git a/batchglm/train/tf/glm_bern/training_strategies.py b/batchglm/train/tf/glm_bern/training_strategies.py
new file mode 100644
index 00000000..d9e57377
--- /dev/null
+++ b/batchglm/train/tf/glm_bern/training_strategies.py
@@ -0,0 +1,27 @@
+from enum import Enum
+
+
+class TrainingStrategies(Enum):
+
+    AUTO = None
+    DEFAULT = [
+        {
+            "convergence_criteria": "all_converged",
+            "use_batching": False,
+            "optim_algo": "irls_gd_tr",
+        },
+    ]
+    IRLS = [
+        {
+            "convergence_criteria": "all_converged",
+            "use_batching": False,
+            "optim_algo": "irls_gd_tr",
+        },
+    ]
+    IRLS_BATCHED = [
+        {
+            "convergence_criteria": "all_converged",
+            "use_batching": True,
+            "optim_algo": "irls_gd_tr",
+        },
+    ]
diff --git a/batchglm/unit_test/glm_all/test_graph_glm_all.py b/batchglm/unit_test/glm_all/test_graph_glm_all.py
index 0d9aebdf..3f476815 100644
--- a/batchglm/unit_test/glm_all/test_graph_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_graph_glm_all.py
@@ -32,6 +32,8 @@ def __init__(
                 from batchglm.api.models.glm_norm import Estimator, InputData
             elif noise_model=="beta":
                 from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="bern":
+                from batchglm.api.models.glm_bern import Estimator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -103,6 +105,8 @@ def get_simulator(self):
                 from batchglm.api.models.glm_norm import Simulator
             elif self.noise_model=="beta":
                 from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model=="bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -221,5 +225,32 @@ def test_batched_beta(self):
         self._test_batched(sparse=True)
 
 
+class Test_Graph_GLM_BERN(
+    Test_Graph_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether training graphs work for bernoulli noise.
+    """
+
+    def test_full_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Graph_GLM_BERN.test_full_bern()")
+
+        self.noise_model = "bern"
+        self._test_full(sparse=False)
+        self._test_full(sparse=True)
+
+    def test_batched_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Graph_GLM_BERN.test_batched_bern()")
+
+        self.noise_model = "bern"
+        self._test_batched(sparse=False)
+        self._test_batched(sparse=True)
+
+
 if __name__ == '__main__':
     unittest.main()

From 64f214193efdb46926cb4459d6d1f42a87141898 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Wed, 27 Mar 2019 18:31:23 +0100
Subject: [PATCH 04/12] included beta and bern in unittests

---
 .../train/tf/base_glm_all/estimator_graph.py  |   2 +-
 .../glm_all/test_acc_analytic_glm_all_2.py    | 135 +++++++++++++++++-
 .../glm_all/test_hessians_glm_all.py          |  19 +++
 .../glm_all/test_jacobians_glm_all.py         |  18 +++
 4 files changed, 166 insertions(+), 8 deletions(-)

diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
index 205342d6..9b360cd7 100644
--- a/batchglm/train/tf/base_glm_all/estimator_graph.py
+++ b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -539,7 +539,7 @@ def __init__(
             self.fisher_inv = op_utils.pinv(-self.full_data_model.hessians_final)  # TODO switch for fim?
             # Summary statistics on feature-wise model gradients:
             a = tf.abs(self.full_data_model.neg_jac_final / num_observations)
-            b = tf.print(a)
+            b = tf.print("gradients: \n", a)
             with tf.control_dependencies([b]):
                 self.gradients = tf.reduce_sum(a, axis=1)
 
diff --git a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
index ef48beb3..e95bbc0d 100644
--- a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
+++ b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
@@ -36,6 +36,10 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="bern":
+                from batchglm.api.models.glm_bern import Estimator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -94,6 +98,12 @@ def eval_estimation_a(
             elif self.noise_model=="norm":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
+            elif self.noise_model=="beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model=="bern":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -129,6 +139,12 @@ def eval_estimation_b(
             elif self.noise_model == "norm":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
+            elif self.noise_model == "beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model == "bern":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -167,6 +183,10 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="beta":
+                from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model=="bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -188,10 +208,33 @@ def get_estimator(self, train_scale, sparse, init_a, init_b):
     def simulate_complex(self):
         self.sim = self.get_simulator()
         self.sim.generate_sample_description(num_batches=1, num_conditions=2)
+
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model=="nb":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="norm":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+            elif self.noise_model=="bern":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
+            else:
+                raise ValueError("noise_model not recognized")
+
         self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2 * 1e5, shape),
-            rand_fn_loc=lambda shape: np.random.uniform(1, 3, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1, 3, shape)
+            rand_fn_ave=rand_fn_ave,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale
         )
         self.sim.generate_data()
 
@@ -199,14 +242,36 @@ def simulate_easy(self):
         self.sim = self.get_simulator()
         self.sim.generate_sample_description(num_batches=1, num_conditions=2)
 
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model=="nb":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.ones(shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="norm":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.ones(shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
+                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+            elif self.noise_model=="bern":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
+                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
+            else:
+                raise ValueError("noise_model not recognized")
+
         def rand_fn_standard(shape):
             theta = np.ones(shape)
             theta[0, :] = np.random.uniform(5, 20, shape[1])
             return theta
 
         self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2 * 1e5, shape),
-            rand_fn_loc=lambda shape: np.ones(shape),
+            rand_fn_ave=rand_fn_ave,
+            rand_fn_loc=rand_fn_loc,
             rand_fn_scale=lambda shape: rand_fn_standard(shape)
         )
         self.sim.generate_data()
@@ -251,7 +316,7 @@ class Test_AccuracyAnalytic_GLM_NB(
     """
 
     def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR),
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
         logger.error("Test_AccuracyAnalytic_GLM_NB.test_a_closed_b_closed()")
 
@@ -280,7 +345,7 @@ class Test_AccuracyAnalytic_GLM_NORM(
     """
 
     def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR),
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
         logger.error("Test_AccuracyAnalytic_GLM_NORM.test_a_closed_b_closed()")
 
@@ -299,6 +364,62 @@ def test_a_standard_b_standard(self):
         self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
         self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
 
+class Test_AccuracyAnalytic_GLM_BETA(
+    Test_AccuracyAnalytic_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for beta distributed noise.
+    """
+
+    def test_a_closed_b_closed(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_closed_b_closed()")
+
+        self.noise_model = "beta"
+        self.simulate_complex()
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+
+    def test_a_standard_b_standard(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_standard_b_standard()")
+
+        self.noise_model = "beta"
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+
+class Test_AccuracyAnalytic_GLM_BERN(
+    Test_AccuracyAnalytic_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for bernoulli distributed noise.
+    """
+
+    def test_a_closed_b_closed(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_closed_b_closed()")
+
+        self.noise_model = "bern"
+        self.simulate_complex()
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+
+    def test_a_standard_b_standard(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_standard_b_standard()")
+
+        self.noise_model = "bern"
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_hessians_glm_all.py b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
index bd09bc81..4d728aec 100644
--- a/batchglm/unit_test/glm_all/test_hessians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
@@ -30,6 +30,8 @@ def simulate(self):
                 from batchglm.api.models.glm_norm import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -53,6 +55,8 @@ def get_hessians(
                 from batchglm.api.models.glm_norm import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Estimator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -94,6 +98,8 @@ def _test_compute_hessians(self, sparse):
                 from batchglm.api.models.glm_norm import Simulator, InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator, InputData
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Simulator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -193,6 +199,19 @@ def test_compute_hessians_beta(self):
 
         return True
 
+class Test_Hessians_GLM_BERN(Test_Hessians_GLM_ALL, unittest.TestCase):
+
+    def test_compute_hessians_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("batchglm").error("Test_Hessians_GLM_BERN.test_compute_hessians_bern()")
+
+        self.noise_model = "bern"
+        self._test_compute_hessians(sparse=False)
+        #self._test_compute_hessians(sparse=False)  # TODO tf>=1.13 waiting for tf.sparse.expand_dims to work
+
+        return True
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
index e1c6588b..a8d7fb50 100644
--- a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
@@ -30,6 +30,8 @@ def simulate(self):
                 from batchglm.api.models.glm_norm import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -53,6 +55,8 @@ def get_jacs(
                 from batchglm.api.models.glm_norm import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import Estimator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -98,6 +102,8 @@ def compare_jacs(
                 from batchglm.api.models.glm_norm import InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import InputData
+            elif self.noise_model == "bern":
+                from batchglm.api.models.glm_bern import InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -193,5 +199,17 @@ def test_compute_jacobians_beta(self):
         #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
 
 
+class Test_Jacobians_GLM_BERN(Test_Jacobians_GLM_ALL, unittest.TestCase):
+
+    def test_compute_jacobians_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.INFO)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logging.getLogger("batchglm").error("Test_Jacobians_GLM_BERN.test_compute_jacobians_bern()")
+
+        self.noise_model = "bern"
+        self._test_compute_jacobians(sparse=False)
+        #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
+
+
 if __name__ == '__main__':
     unittest.main()

From 165a26b6d9402425d31b34a8e3655d3d3c12fb29 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Sat, 30 Mar 2019 12:29:45 +0100
Subject: [PATCH 05/12] fixed test for hessian

---
 .../glm_all/test_hessians_glm_all.py          | 27 ++++++++++++++++---
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/batchglm/unit_test/glm_all/test_hessians_glm_all.py b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
index 4d728aec..4fadd79b 100644
--- a/batchglm/unit_test/glm_all/test_hessians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
@@ -28,6 +28,8 @@ def simulate(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model == "beta2":
+                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model == "bern":
@@ -53,6 +55,8 @@ def get_hessians(
                 from batchglm.api.models.glm_nb import Estimator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Estimator
+            elif self.noise_model == "beta2":
+                from batchglm.api.models.glm_beta2 import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
             elif self.noise_model == "bern":
@@ -96,6 +100,8 @@ def _test_compute_hessians(self, sparse):
                 from batchglm.api.models.glm_nb import Simulator, InputData
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Simulator, InputData
+            elif self.noise_model == "beta2":
+                from batchglm.api.models.glm_beta2 import Simulator, InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator, InputData
             elif self.noise_model == "bern":
@@ -144,7 +150,7 @@ def _test_compute_hessians(self, sparse):
         logging.getLogger("batchglm").info("run time observation batch-wise analytic solution: %f" % t_analytic)
         logging.getLogger("batchglm").info("run time tensorflow solution: %f" % t_tf)
         logging.getLogger("batchglm").info("MAD: %f" % np.max(np.abs((h_tf - h_analytic))))
-        logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs((h_tf - h_analytic) / h_tf)))
+        logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs(h_tf - h_analytic)))
 
         #i = 1
         #print(h_tf[i, :, :])
@@ -154,7 +160,7 @@ def _test_compute_hessians(self, sparse):
         # Make sure that hessians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(h_analytic)) > 1e-10, \
             "hessians too small to perform test: %f" % np.sum(np.abs(h_analytic))
-        mrad = np.max(np.abs((h_tf - h_analytic) / h_tf)) < 1e-12
+        mrad = np.max(np.abs(h_tf - h_analytic))
         assert mrad < 1e-12, mrad
         return True
 
@@ -186,12 +192,25 @@ def test_compute_hessians_norm(self):
         return True
 
 
-class Test_Hessians_GLM_BETA(Test_Hessians_GLM_ALL, unittest.TestCase):
+class Test_Hessians_GLM_beta2(Test_Hessians_GLM_ALL, unittest.TestCase):
+
+    def test_compute_hessians_beta2(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logging.getLogger("batchglm").error("Test_Hessians_GLM_beta2.test_compute_hessians_beta2()")
+
+        self.noise_model = "beta2"
+        self._test_compute_hessians(sparse=False)
+        #self._test_compute_hessians(sparse=False)  # TODO tf>=1.13 waiting for tf.sparse.expand_dims to work
+
+        return True
+
+class Test_Hessians_GLM_beta(Test_Hessians_GLM_ALL, unittest.TestCase):
 
     def test_compute_hessians_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("batchglm").error("Test_Hessians_GLM_BETA.test_compute_hessians_beta()")
+        logging.getLogger("batchglm").error("Test_Hessians_GLM_beta.test_compute_hessians_beta()")
 
         self.noise_model = "beta"
         self._test_compute_hessians(sparse=False)

From 1dcc8c46ea919e5e24f58786d20c3c85a549db48 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Sat, 30 Mar 2019 12:31:48 +0100
Subject: [PATCH 06/12] renamed beta with mean and samplesize to beta2, added
 new beta with common parameters p and q

---
 batchglm/api/models/__init__.py               |   1 +
 batchglm/api/models/glm_beta2.py              |   2 +
 batchglm/api/utils/random.py                  |   2 +-
 batchglm/models/glm_beta/estimator.py         |   2 +-
 batchglm/models/glm_beta/model.py             |  16 +-
 batchglm/models/glm_beta/simulator.py         |  16 +-
 batchglm/models/glm_beta/utils.py             |  71 ++--
 batchglm/models/glm_beta2/__init__.py         |   4 +
 batchglm/models/glm_beta2/estimator.py        |  30 ++
 batchglm/models/glm_beta2/external.py         |  11 +
 batchglm/models/glm_beta2/model.py            |  83 +++++
 batchglm/models/glm_beta2/simulator.py        |  47 +++
 batchglm/models/glm_beta2/utils.py            |  74 +++++
 batchglm/train/tf/base_glm_all/estimator.py   |   4 +
 .../train/tf/base_glm_all/estimator_graph.py  |   6 +
 .../train/tf/base_glm_all/external_beta.py    |   2 +-
 .../train/tf/base_glm_all/external_beta2.py   |   6 +
 .../tf/base_glm_all/reducible_tensors.py      |   2 +
 batchglm/train/tf/glm_beta/estimator.py       | 129 ++++----
 batchglm/train/tf/glm_beta/external.py        |   2 +-
 batchglm/train/tf/glm_beta/fim.py             |  24 +-
 batchglm/train/tf/glm_beta/hessians.py        |  54 +--
 batchglm/train/tf/glm_beta/jacobians.py       |  25 +-
 batchglm/train/tf/glm_beta/model.py           |  60 ++--
 .../train/tf/glm_beta/reducible_tensors.py    |   2 +-
 .../train/tf/glm_beta/training_strategies.py  |  30 +-
 batchglm/train/tf/glm_beta2/__init__.py       |   7 +
 batchglm/train/tf/glm_beta2/estimator.py      | 307 ++++++++++++++++++
 .../train/tf/glm_beta2/estimator_graph.py     |  12 +
 batchglm/train/tf/glm_beta2/external.py       |  20 ++
 batchglm/train/tf/glm_beta2/fim.py            |  25 ++
 batchglm/train/tf/glm_beta2/hessians.py       |  82 +++++
 batchglm/train/tf/glm_beta2/jacobians.py      |  42 +++
 batchglm/train/tf/glm_beta2/model.py          | 145 +++++++++
 .../train/tf/glm_beta2/reducible_tensors.py   |  13 +
 .../train/tf/glm_beta2/training_strategies.py |  37 +++
 .../glm_all/test_acc_analytic_glm_all_2.py    |  62 +++-
 .../unit_test/glm_all/test_graph_glm_all.py   |  42 ++-
 .../glm_all/test_jacobians_glm_all.py         |  17 +
 batchglm/utils/random.py                      |  37 ++-
 40 files changed, 1321 insertions(+), 232 deletions(-)
 create mode 100644 batchglm/api/models/glm_beta2.py
 create mode 100644 batchglm/models/glm_beta2/__init__.py
 create mode 100644 batchglm/models/glm_beta2/estimator.py
 create mode 100644 batchglm/models/glm_beta2/external.py
 create mode 100644 batchglm/models/glm_beta2/model.py
 create mode 100644 batchglm/models/glm_beta2/simulator.py
 create mode 100644 batchglm/models/glm_beta2/utils.py
 create mode 100644 batchglm/train/tf/base_glm_all/external_beta2.py
 create mode 100644 batchglm/train/tf/glm_beta2/__init__.py
 create mode 100644 batchglm/train/tf/glm_beta2/estimator.py
 create mode 100644 batchglm/train/tf/glm_beta2/estimator_graph.py
 create mode 100644 batchglm/train/tf/glm_beta2/external.py
 create mode 100644 batchglm/train/tf/glm_beta2/fim.py
 create mode 100644 batchglm/train/tf/glm_beta2/hessians.py
 create mode 100644 batchglm/train/tf/glm_beta2/jacobians.py
 create mode 100644 batchglm/train/tf/glm_beta2/model.py
 create mode 100644 batchglm/train/tf/glm_beta2/reducible_tensors.py
 create mode 100644 batchglm/train/tf/glm_beta2/training_strategies.py

diff --git a/batchglm/api/models/__init__.py b/batchglm/api/models/__init__.py
index 5a3142c9..e11f0a84 100644
--- a/batchglm/api/models/__init__.py
+++ b/batchglm/api/models/__init__.py
@@ -1,4 +1,5 @@
 from . import glm_nb
 from . import glm_norm
+from . import glm_beta2
 from . import glm_beta
 from . import glm_bern
diff --git a/batchglm/api/models/glm_beta2.py b/batchglm/api/models/glm_beta2.py
new file mode 100644
index 00000000..88d69651
--- /dev/null
+++ b/batchglm/api/models/glm_beta2.py
@@ -0,0 +1,2 @@
+from batchglm.models.glm_beta2 import InputData, Model, Simulator
+from batchglm.train.tf.glm_beta2 import Estimator
\ No newline at end of file
diff --git a/batchglm/api/utils/random.py b/batchglm/api/utils/random.py
index fb674530..68256f3a 100644
--- a/batchglm/api/utils/random.py
+++ b/batchglm/api/utils/random.py
@@ -1 +1 @@
-from batchglm.utils.random import NegativeBinomial, Normal, Beta, Bernoulli
+from batchglm.utils.random import NegativeBinomial, Normal, beta2, Bernoulli, Beta
diff --git a/batchglm/models/glm_beta/estimator.py b/batchglm/models/glm_beta/estimator.py
index 034dfffe..5c62ca1c 100644
--- a/batchglm/models/glm_beta/estimator.py
+++ b/batchglm/models/glm_beta/estimator.py
@@ -7,7 +7,7 @@
 class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
     r"""
     Estimator base class for generalized linear models (GLMs) with
-    normal noise.
+    negative binomial noise.
     """
 
     @classmethod
diff --git a/batchglm/models/glm_beta/model.py b/batchglm/models/glm_beta/model.py
index 4ac37fb5..dba293f9 100644
--- a/batchglm/models/glm_beta/model.py
+++ b/batchglm/models/glm_beta/model.py
@@ -12,13 +12,13 @@
 # Define distribution parameters:
 MODEL_PARAMS = MODEL_PARAMS.copy()
 MODEL_PARAMS.update({
-    "mean": ("observations", "features"),
-    "samplesize": ("observations", "features"),
+    "mu": ("observations", "features"),
+    "r": ("observations", "features"),
 })
 
 class Model(_Model_GLM, metaclass=abc.ABCMeta):
     """
-    Generalized Linear Model (GLM) with beta distributed noise, logit link for location and log link for scale.
+    Generalized Linear Model (GLM) with negative binomial noise.
     """
 
     @classmethod
@@ -26,10 +26,10 @@ def param_shapes(cls) -> dict:
         return MODEL_PARAMS
 
     def link_loc(self, data):
-        return np.log(1/(1/data-1))
+        return np.log(data)
 
     def inverse_link_loc(self, data):
-        return 1/(1+np.exp(-data))
+        return np.exp(data)
 
     def link_scale(self, data):
         return np.log(data)
@@ -46,15 +46,15 @@ def eta_loc(self) -> xr.DataArray:
             eta = np.matmul(self.design_loc.values, self.par_link_loc)
 
         if self.size_factors is not None:
-            assert False, "size factors not allowed"
+            eta += self.link_loc(np.expand_dims(self.size_factors, axis=1))
         return eta
 
     @property
-    def mean(self) -> xr.DataArray:
+    def mu(self) -> xr.DataArray:
         return self.location
 
     @property
-    def samplesize(self) -> xr.DataArray:
+    def r(self) -> xr.DataArray:
         return self.scale
 
 
diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
index 7d9143d7..5ed90c40 100644
--- a/batchglm/models/glm_beta/simulator.py
+++ b/batchglm/models/glm_beta/simulator.py
@@ -6,8 +6,8 @@
 
 class Simulator(_Simulator_GLM, Model):
     """
-    Simulator for Generalized Linear Models (GLMs) with beta distributed noise.
-    Uses a logit-linker function for loc and a log-linker function for scale.
+    Simulator for Generalized Linear Models (GLMs) with negative binomial noise.
+    Uses the natural logarithm as linker function.
     """
 
     def __init__(
@@ -24,10 +24,10 @@ def __init__(
 
     def generate_params(
             self,
-            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
-            rand_fn=None,
-            rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1e1, 2*1e1, shape),
+            rand_fn_ave=lambda shape: np.random.uniform(10, 20, shape),
+            rand_fn=lambda shape: np.abs(np.random.uniform(10, 20, shape)),
+            rand_fn_loc=None,
+            rand_fn_scale=None,
         ):
         self._generate_params(
             self,
@@ -39,9 +39,9 @@ def generate_params(
 
     def generate_data(self):
         """
-        Sample random data based on beta distribution and parameters.
+        Sample random data based on negative binomial distribution and parameters.
         """
         self.data["X"] = (
             self.param_shapes()["X"],
-            rand_utils.Beta(mean=self.mean, samplesize=self.samplesize).sample()
+            rand_utils.Beta(a=self.mu, b=self.r).sample()
         )
diff --git a/batchglm/models/glm_beta/utils.py b/batchglm/models/glm_beta/utils.py
index a3c8592d..ef125783 100644
--- a/batchglm/models/glm_beta/utils.py
+++ b/batchglm/models/glm_beta/utils.py
@@ -1,36 +1,35 @@
-from copy import copy, deepcopy
 from typing import Union
 
 import numpy as np
-import scipy.sparse
 import xarray as xr
 
 from .external import closedform_glm_mean, closedform_glm_scale
-from .external import weighted_mean
 from .external import SparseXArrayDataArray
 
 
-def closedform_beta_glm_logitmean(
+def closedform_beta_glm_logmu(
         X: Union[xr.DataArray, SparseXArrayDataArray],
         design_loc,
         constraints_loc,
+        design_scale: xr.DataArray,
+        constraints=None,
         size_factors=None,
-        link_fn=lambda x: np.log(1/(1/x-1)),
-        inv_link_fn=lambda x: 1/(1+np.exp(-x))
+        link_fn=np.log,
+        inv_link_fn=np.exp
 ):
     r"""
-    Calculates a closed-form solution for the `mean` parameters of beta GLMs.
+    Calculates a closed-form solution for the `mu` parameters of negative-binomial GLMs.
 
     :param X: The sample data
     :param design_loc: design matrix for location
-    :param constraints: tensor (all parameters x dependent parameters)
+    :param constraints_loc: tensor (all parameters x dependent parameters)
         Tensor that encodes how complete parameter set which includes dependent
         parameters arises from indepedent parameters: all = <constraints, indep>.
         This form of constraints is used in vector generalized linear models (VGLMs).
     :param size_factors: size factors for X
-    :return: tuple: (groupwise_means, mean, rmsd)
+    :return: tuple: (groupwise_means, mu, rmsd)
     """
-    return closedform_glm_mean(
+    groupwise_means, mu, rmsd1 =  closedform_glm_mean(
         X=X,
         dmat=design_loc,
         constraints=constraints_loc,
@@ -39,36 +38,60 @@ def closedform_beta_glm_logitmean(
         inv_link_fn=inv_link_fn
     )
 
+    groupwise_scale, var, rmsd2 =  closedform_glm_scale(
+        X=X,
+        design_scale=design_scale,
+        constraints=constraints,
+        size_factors=size_factors,
+        groupwise_means=groupwise_means,
+        link_fn=link_fn,
+        compute_scales_fun=None
+    )
+
+    mu = mu / var * (mu * (1-mu) - var)
+    return groupwise_means, mu, rmsd1
 
-def closedform_beta_glm_logsamplesize(
+
+def closedform_beta_glm_logphi(
         X: Union[xr.DataArray, SparseXArrayDataArray],
+        design_loc,
+        constraints_loc,
         design_scale: xr.DataArray,
         constraints=None,
         size_factors=None,
-        groupwise_means=None,
-        link_fn=np.log
+        link_fn=np.log,
+        inv_link_fn=np.exp,
 ):
     r"""
-    Calculates a closed-form solution for the log-scale parameters of beta GLMs.
+    Calculates a closed-form solution for the `mu` parameters of negative-binomial GLMs.
 
     :param X: The sample data
-    :param design_scale: design matrix for scale
-    :param constraints: some design constraints
+    :param design_loc: design matrix for location
+    :param constraints_loc: tensor (all parameters x dependent parameters)
+        Tensor that encodes how complete parameter set which includes dependent
+        parameters arises from indepedent parameters: all = <constraints, indep>.
+        This form of constraints is used in vector generalized linear models (VGLMs).
     :param size_factors: size factors for X
-    :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
-    :return: tuple (groupwise_scales, logsd, rmsd)
+    :return: tuple: (groupwise_means, mu, rmsd)
     """
+    groupwise_means, mu, rmsd1 = closedform_glm_mean(
+        X=X,
+        dmat=design_loc,
+        constraints=constraints_loc,
+        size_factors=size_factors,
+        link_fn=link_fn,
+        inv_link_fn=inv_link_fn
+    )
 
-    def compute_scales_fun(variance, mean):
-        groupwise_scales = mean*(1-mean)/variance - 1
-        return groupwise_scales
-
-    return closedform_glm_scale(
+    groupwise_scale, var, rmsd2 = closedform_glm_scale(
         X=X,
         design_scale=design_scale,
         constraints=constraints,
         size_factors=size_factors,
         groupwise_means=groupwise_means,
         link_fn=link_fn,
-        compute_scales_fun=compute_scales_fun
+        compute_scales_fun=None,
     )
+
+    var = (1 - mu) / var * (mu * (1 - mu) - var)
+    return groupwise_scale, var, rmsd2
diff --git a/batchglm/models/glm_beta2/__init__.py b/batchglm/models/glm_beta2/__init__.py
new file mode 100644
index 00000000..efcf833d
--- /dev/null
+++ b/batchglm/models/glm_beta2/__init__.py
@@ -0,0 +1,4 @@
+from .model import Model, Model_XArray
+from .external import InputData
+from .simulator import Simulator
+from .estimator import AbstractEstimator, EstimatorStoreXArray
\ No newline at end of file
diff --git a/batchglm/models/glm_beta2/estimator.py b/batchglm/models/glm_beta2/estimator.py
new file mode 100644
index 00000000..034dfffe
--- /dev/null
+++ b/batchglm/models/glm_beta2/estimator.py
@@ -0,0 +1,30 @@
+import abc
+
+from .model import Model, Model_XArray
+from .external import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+
+
+class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
+    r"""
+    Estimator base class for generalized linear models (GLMs) with
+    normal noise.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return ESTIMATOR_PARAMS
+
+
+class EstimatorStoreXArray(_EstimatorStore_XArray_GLM, AbstractEstimator, Model_XArray):
+
+    def __init__(self, estim: AbstractEstimator):
+        input_data = estim.input_data
+        # to_xarray triggers the get function of these properties and thereby
+        # causes evaluation of the properties that have not been computed during
+        # training, such as the hessian.
+        params = estim.to_xarray(
+            ["a_var", "b_var", "loss", "log_likelihood", "gradients", "fisher_inv"],
+            coords=input_data.data
+        )
+
+        Model_XArray.__init__(self, input_data, params)
\ No newline at end of file
diff --git a/batchglm/models/glm_beta2/external.py b/batchglm/models/glm_beta2/external.py
new file mode 100644
index 00000000..bb52b9f2
--- /dev/null
+++ b/batchglm/models/glm_beta2/external.py
@@ -0,0 +1,11 @@
+from batchglm.models.base import SparseXArrayDataArray, SparseXArrayDataSet
+from batchglm.models.base_glm import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+from batchglm.models.base_glm import InputData, INPUT_DATA_PARAMS
+from batchglm.models.base_glm import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+from batchglm.models.base_glm import _Simulator_GLM
+from batchglm.models.base_glm import closedform_glm_mean, closedform_glm_scale
+
+import batchglm.data as data_utils
+import batchglm.utils.random as rand_utils
+from batchglm.utils.numeric import weighted_mean, weighted_variance
+from batchglm.utils.linalg import groupwise_solve_lm
\ No newline at end of file
diff --git a/batchglm/models/glm_beta2/model.py b/batchglm/models/glm_beta2/model.py
new file mode 100644
index 00000000..7c7d208f
--- /dev/null
+++ b/batchglm/models/glm_beta2/model.py
@@ -0,0 +1,83 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import xarray as xr
+import numpy as np
+
+from .external import InputData
+from .external import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+
+# Define distribution parameters:
+MODEL_PARAMS = MODEL_PARAMS.copy()
+MODEL_PARAMS.update({
+    "mean": ("observations", "features"),
+    "samplesize": ("observations", "features"),
+})
+
+class Model(_Model_GLM, metaclass=abc.ABCMeta):
+    """
+    Generalized Linear Model (GLM) with beta2 distributed noise, logit link for location and log link for scale.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return MODEL_PARAMS
+
+    def link_loc(self, data):
+        return np.log(1/(1/data-1))
+
+    def inverse_link_loc(self, data):
+        return 1/(1+np.exp(-data))
+
+    def link_scale(self, data):
+        return np.log(data)
+
+    def inverse_link_scale(self, data):
+        return np.exp(data)
+
+    @property
+    def eta_loc(self) -> xr.DataArray:
+        # TODO: take this switch out once xr.dataset slicing yields dataarray with loc_names coordinate:
+        if isinstance(self.par_link_loc, xr.DataArray):
+            eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
+        else:
+            eta = np.matmul(self.design_loc.values, self.par_link_loc)
+
+        if self.size_factors is not None:
+            assert False, "size factors not allowed"
+        return eta
+
+    @property
+    def mean(self) -> xr.DataArray:
+        return self.location
+
+    @property
+    def samplesize(self) -> xr.DataArray:
+        return self.scale
+
+
+def model_from_params(*args, **kwargs) -> Model:
+    (input_data, params) = _model_from_params(*args, **kwargs)
+    return Model_XArray(input_data, params)
+
+
+class Model_XArray(_Model_XArray_GLM, Model):
+    _input_data: InputData
+    params: xr.Dataset
+
+    def __init__(self, input_data: InputData, params: xr.Dataset):
+        super(_Model_XArray_GLM, self).__init__(input_data=input_data, params=params)
+        super(Model, self).__init__()
+
+    def __str__(self):
+        return "[%s.%s object at %s]: data=%s" % (
+            type(self).__module__,
+            type(self).__name__,
+            hex(id(self)),
+            self.params
+        )
+
+    def __repr__(self):
+        return self.__str__()
diff --git a/batchglm/models/glm_beta2/simulator.py b/batchglm/models/glm_beta2/simulator.py
new file mode 100644
index 00000000..22558cd6
--- /dev/null
+++ b/batchglm/models/glm_beta2/simulator.py
@@ -0,0 +1,47 @@
+import numpy as np
+
+from .model import Model
+from .external import rand_utils, _Simulator_GLM
+
+
+class Simulator(_Simulator_GLM, Model):
+    """
+    Simulator for Generalized Linear Models (GLMs) with beta2 distributed noise.
+    Uses a logit-linker function for loc and a log-linker function for scale.
+    """
+
+    def __init__(
+            self,
+            num_observations=1000,
+            num_features=100
+    ):
+        Model.__init__(self)
+        _Simulator_GLM.__init__(
+            self,
+            num_observations=num_observations,
+            num_features=num_features
+        )
+
+    def generate_params(
+            self,
+            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
+            rand_fn=None,
+            rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
+            rand_fn_scale=lambda shape: np.random.uniform(1e1, 2*1e1, shape),
+        ):
+        self._generate_params(
+            self,
+            rand_fn_ave=rand_fn_ave,
+            rand_fn=rand_fn,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale,
+        )
+
+    def generate_data(self):
+        """
+        Sample random data based on beta2 distribution and parameters.
+        """
+        self.data["X"] = (
+            self.param_shapes()["X"],
+            rand_utils.beta2(mean=self.mean, samplesize=self.samplesize).sample()
+        )
diff --git a/batchglm/models/glm_beta2/utils.py b/batchglm/models/glm_beta2/utils.py
new file mode 100644
index 00000000..025621d9
--- /dev/null
+++ b/batchglm/models/glm_beta2/utils.py
@@ -0,0 +1,74 @@
+from copy import copy, deepcopy
+from typing import Union
+
+import numpy as np
+import scipy.sparse
+import xarray as xr
+
+from .external import closedform_glm_mean, closedform_glm_scale
+from .external import weighted_mean
+from .external import SparseXArrayDataArray
+
+
+def closedform_beta2_glm_logitmean(
+        X: Union[xr.DataArray, SparseXArrayDataArray],
+        design_loc,
+        constraints_loc,
+        size_factors=None,
+        link_fn=lambda x: np.log(1/(1/x-1)),
+        inv_link_fn=lambda x: 1/(1+np.exp(-x))
+):
+    r"""
+    Calculates a closed-form solution for the `mean` parameters of beta2 GLMs.
+
+    :param X: The sample data
+    :param design_loc: design matrix for location
+    :param constraints: tensor (all parameters x dependent parameters)
+        Tensor that encodes how complete parameter set which includes dependent
+        parameters arises from indepedent parameters: all = <constraints, indep>.
+        This form of constraints is used in vector generalized linear models (VGLMs).
+    :param size_factors: size factors for X
+    :return: tuple: (groupwise_means, mean, rmsd)
+    """
+    return closedform_glm_mean(
+        X=X,
+        dmat=design_loc,
+        constraints=constraints_loc,
+        size_factors=size_factors,
+        link_fn=link_fn,
+        inv_link_fn=inv_link_fn
+    )
+
+
+def closedform_beta2_glm_logsamplesize(
+        X: Union[xr.DataArray, SparseXArrayDataArray],
+        design_scale: xr.DataArray,
+        constraints=None,
+        size_factors=None,
+        groupwise_means=None,
+        link_fn=np.log
+):
+    r"""
+    Calculates a closed-form solution for the log-scale parameters of beta2 GLMs.
+
+    :param X: The sample data
+    :param design_scale: design matrix for scale
+    :param constraints: some design constraints
+    :param size_factors: size factors for X
+    :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
+    :return: tuple (groupwise_scales, logsd, rmsd)
+    """
+
+    def compute_scales_fun(variance, mean):
+        groupwise_scales = mean*(1-mean)/variance - 1
+        return groupwise_scales
+
+    return closedform_glm_scale(
+        X=X,
+        design_scale=design_scale,
+        constraints=constraints,
+        size_factors=size_factors,
+        groupwise_means=groupwise_means,
+        link_fn=link_fn,
+        compute_scales_fun=compute_scales_fun
+    )
diff --git a/batchglm/train/tf/base_glm_all/estimator.py b/batchglm/train/tf/base_glm_all/estimator.py
index 3b21219f..51ca565e 100644
--- a/batchglm/train/tf/base_glm_all/estimator.py
+++ b/batchglm/train/tf/base_glm_all/estimator.py
@@ -71,6 +71,8 @@ def __init__(
             from .external_nb import EstimatorGraph
         elif noise_model == "norm":
             from .external_norm import EstimatorGraph
+        elif noise_model == "beta2":
+            from .external_beta2 import EstimatorGraph
         elif noise_model == "beta":
             from .external_beta import EstimatorGraph
         elif noise_model == "bern":
@@ -356,6 +358,8 @@ def finalize(self):
             from .external_nb import EstimatorStoreXArray
         elif self.noise_model == "norm":
             from .external_norm import EstimatorStoreXArray
+        elif self.noise_model == "beta2":
+            from .external_beta2 import EstimatorStoreXArray
         elif self.noise_model == "beta":
             from .external_beta import EstimatorStoreXArray
         elif self.noise_model == "bern":
diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
index 9b360cd7..52f36f7c 100644
--- a/batchglm/train/tf/base_glm_all/estimator_graph.py
+++ b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -64,6 +64,8 @@ def __init__(
             from .external_nb import ReducibleTensors
         elif noise_model == "norm":
             from .external_norm import ReducibleTensors
+        elif noise_model == "beta2":
+            from .external_beta2 import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
         elif noise_model == "bern":
@@ -252,6 +254,8 @@ def __init__(
             from .external_nb import ReducibleTensors
         elif noise_model == "norm":
             from .external_norm import ReducibleTensors
+        elif noise_model == "beta2":
+            from .external_beta2 import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
         elif noise_model == "bern":
@@ -435,6 +439,8 @@ def __init__(
             from .external_nb import ModelVars
         elif noise_model == "norm":
             from .external_norm import ModelVars
+        elif noise_model == "beta2":
+            from .external_beta2 import ModelVars
         elif noise_model == "beta":
             from .external_beta import ModelVars
         elif noise_model == "bern":
diff --git a/batchglm/train/tf/base_glm_all/external_beta.py b/batchglm/train/tf/base_glm_all/external_beta.py
index 466cd3d0..15eb60f2 100644
--- a/batchglm/train/tf/base_glm_all/external_beta.py
+++ b/batchglm/train/tf/base_glm_all/external_beta.py
@@ -3,4 +3,4 @@
 from batchglm.train.tf.glm_beta import Hessians, FIM, Jacobians, ReducibleTensors
 
 from batchglm.models.glm_beta import AbstractEstimator, EstimatorStoreXArray, InputData, Model
-from batchglm.models.glm_beta.utils import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize
\ No newline at end of file
+from batchglm.models.glm_beta.utils import closedform_beta_glm_logmu, closedform_beta_glm_logphi
\ No newline at end of file
diff --git a/batchglm/train/tf/base_glm_all/external_beta2.py b/batchglm/train/tf/base_glm_all/external_beta2.py
new file mode 100644
index 00000000..efdcbb2f
--- /dev/null
+++ b/batchglm/train/tf/base_glm_all/external_beta2.py
@@ -0,0 +1,6 @@
+from batchglm.train.tf.glm_beta2 import EstimatorGraph
+from batchglm.train.tf.glm_beta2 import BasicModelGraph, ModelVars, ProcessModel
+from batchglm.train.tf.glm_beta2 import Hessians, FIM, Jacobians, ReducibleTensors
+
+from batchglm.models.glm_beta2 import AbstractEstimator, EstimatorStoreXArray, InputData, Model
+from batchglm.models.glm_beta2.utils import closedform_beta2_glm_logitmean, closedform_beta2_glm_logsamplesize
\ No newline at end of file
diff --git a/batchglm/train/tf/base_glm_all/reducible_tensors.py b/batchglm/train/tf/base_glm_all/reducible_tensors.py
index dbe20689..558c6870 100644
--- a/batchglm/train/tf/base_glm_all/reducible_tensors.py
+++ b/batchglm/train/tf/base_glm_all/reducible_tensors.py
@@ -35,6 +35,8 @@ def assemble_tensors(self, idx, data):
             from .external_nb import BasicModelGraph
         elif self.noise_model == "norm":
             from .external_norm import BasicModelGraph
+        elif self.noise_model == "beta2":
+            from .external_beta2 import BasicModelGraph
         elif self.noise_model == "beta":
             from .external_beta import BasicModelGraph
         elif self.noise_model == "bern":
diff --git a/batchglm/train/tf/glm_beta/estimator.py b/batchglm/train/tf/glm_beta/estimator.py
index 3a9563ef..1f230534 100644
--- a/batchglm/train/tf/glm_beta/estimator.py
+++ b/batchglm/train/tf/glm_beta/estimator.py
@@ -5,18 +5,19 @@
 import tensorflow as tf
 
 from .external import AbstractEstimator, EstimatorAll, ESTIMATOR_PARAMS, InputData, Model
-from .external import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize
+from .external import closedform_beta_glm_logmu, closedform_beta_glm_logphi
 from .external import SparseXArrayDataArray
 from .estimator_graph import EstimatorGraph
 from .model import ProcessModel
 from .training_strategies import TrainingStrategies
 
+logger = logging.getLogger("batchglm")
 
 
 class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
     """
-    Estimator for Generalized Linear Models (GLMs) with beta distributed noise.
-    Uses a logit linker function for loc and log linker function for scale.
+    Estimator for Generalized Linear Models (GLMs) with negative binomial noise.
+    Uses the natural logarithm as linker function.
     """
 
     def __init__(
@@ -25,8 +26,8 @@ def __init__(
             batch_size: int = 500,
             graph: tf.Graph = None,
             init_model: Model = None,
-            init_a: Union[np.ndarray, str] = "AUTO",
-            init_b: Union[np.ndarray, str] = "AUTO",
+            init_a: Union[np.ndarray, str] = "closed_form",
+            init_b: Union[np.ndarray, str] = "closed_form",
             quick_scale: bool = False,
             model: EstimatorGraph = None,
             provide_optimizers: dict = {
@@ -36,10 +37,10 @@ def __init__(
                 "rmsprop": True,
                 "nr": True,
                 "nr_tr": True,
-                "irls": False,
-                "irls_gd": False,
-                "irls_tr": False,
-                "irls_gd_tr": False,
+                "irls": True,
+                "irls_gd": True,
+                "irls_tr": True,
+                "irls_gd_tr": True,
             },
             provide_batched: bool = False,
             provide_fim: bool = False,
@@ -105,7 +106,7 @@ def __init__(
 
         self._input_data = input_data
         self._train_loc = True
-        self._train_scale = not quick_scale
+        self._train_scale = True
 
         (init_a, init_b) = self.init_par(
             input_data=input_data,
@@ -115,16 +116,13 @@ def __init__(
         )
         init_a = init_a.astype(dtype)
         init_b = init_b.astype(dtype)
-
-        print("init_a")
-        print(init_a)
-        print("init_b")
-        print(init_b)
+        if quick_scale:
+            self._train_scale = False
 
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
                 provide_hessian = True
-            if np.any([x.lower() in ["irls", "irls_tr"] for x in optim_algos]):
+            if np.any([x.lower() in ["irls", "irls_tr", "irls_gd", "irls_gd_tr"] for x in optim_algos]):
                 provide_fim = True
 
         EstimatorAll.__init__(
@@ -173,6 +171,12 @@ def init_par(
         """
 
         size_factors_init = input_data.size_factors
+        if size_factors_init is not None:
+            size_factors_init = np.expand_dims(size_factors_init, axis=1)
+            size_factors_init = np.broadcast_to(
+                array=size_factors_init,
+                shape=[input_data.num_observations, input_data.num_features]
+            )
 
         if init_model is None:
             groupwise_means = None
@@ -184,59 +188,66 @@ def init_par(
                     init_a = "closed_form"
 
                 if init_a.lower() == "closed_form":
-                    groupwise_means, init_a, rmsd_a = closedform_beta_glm_logitmean(
+                    groupwise_means, init_a, rmsd_a = closedform_beta_glm_logmu(
                         X=input_data.X,
                         design_loc=input_data.design_loc,
                         constraints_loc=input_data.constraints_loc.values,
+                        design_scale=input_data.design_scale,
+                        constraints=input_data.constraints_scale.values,
                         size_factors=size_factors_init,
-                        link_fn=lambda mean: np.log(
-                            1/(1/self.np_clip_param(mean, "mean")-1)
-                        )
+                        link_fn=lambda mu: np.log(self.np_clip_param(mu, "mu"))
                     )
 
                     # train mu, if the closed-form solution is inaccurate
                     self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0)
 
-
-                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for mean")
-                elif init_a.lower() == "standard":
-                    if isinstance(input_data.X, SparseXArrayDataArray):
-                        overall_means = input_data.X.mean(dim="observations")
-                    else:
-                        overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
-                    overall_means = self.np_clip_param(overall_means, "mean")
-
-                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
-                    init_a[0, :] = np.log(overall_means/(1-overall_means))
-                    self._train_loc = True
-
-                    logging.getLogger("batchglm").debug("Using standard initialization for mean")
+                    if input_data.size_factors is not None:
+                        if np.any(input_data.size_factors != 1):
+                            self._train_loc = True
+
+                    logger.debug("Using closed-form MLE initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
+                # elif init_a.lower() == "standard":
+                #     if isinstance(input_data.X, SparseXArrayDataArray):
+                #         overall_means = input_data.X.mean(dim="observations")
+                #     else:
+                #         overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
+                #     overall_means = self.np_clip_param(overall_means, "mu")
+                #
+                #     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                #     init_a[0, :] = np.log(overall_means)
+                #     self._train_loc = True
+                #
+                #     logger.debug("Using standard initialization for mean")
+                #     logger.debug("Should train mu: %s", self._train_loc)
                 elif init_a.lower() == "all_zero":
                     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
                     self._train_loc = True
 
-                    logging.getLogger("batchglm").debug("Using all zero initialization for mean")
+                    logger.debug("Using all_zero initialization for mean")
+                    logger.debug("Should train mu: %s", self._train_loc)
                 else:
                     raise ValueError("init_a string %s not recognized" % init_a)
-                logging.getLogger("batchglm").debug("Should train mean: %s", self._train_loc)
+
             if isinstance(init_b, str):
                 if init_b.lower() == "auto":
                     init_b = "standard"
 
-                if init_b.lower() == "standard":
-                    groupwise_scales, init_b_intercept, rmsd_b = closedform_beta_glm_logsamplesize(
-                        X=input_data.X,
-                        design_scale=input_data.design_scale[:, [0]],
-                        constraints=input_data.constraints_scale[[0], [0]].values,
-                        size_factors=size_factors_init,
-                        groupwise_means=None,
-                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
-                    )
-                    init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
-                    init_b[0, :] = init_b_intercept
-
-                    logging.getLogger("batchglm").debug("Using standard-form MME initialization for dispersion")
-                elif init_b.lower() == "closed_form":
+                # if init_b.lower() == "standard":
+                #     groupwise_scales, init_b_intercept, rmsd_b = closedform_beta_glm_logphi(
+                #         X=input_data.X,
+                #         design_scale=input_data.design_scale[:, [0]],
+                #         constraints=input_data.constraints_scale[[0], [0]].values,
+                #         size_factors=size_factors_init,
+                #         groupwise_means=None,
+                #         link_fn=lambda r: np.log(self.np_clip_param(r, "r"))
+                #     )
+                #     init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
+                #     init_b[0, :] = init_b_intercept
+                #
+                #     logger.debug("Using standard-form MME initialization for dispersion")
+                #     logger.debug("Should train r: %s", self._train_scale)
+                if init_b.lower() == "closed_form":
                     dmats_unequal = False
                     if input_data.design_loc.shape[1] == input_data.design_scale.shape[1]:
                         if np.any(input_data.design_loc.values != input_data.design_scale.values):
@@ -251,23 +262,25 @@ def init_par(
                         raise ValueError("cannot use closed_form init for scale model " +
                                          "if scale model differs from loc model")
 
-                    groupwise_scales, init_b, rmsd_b = closedform_beta_glm_logsamplesize(
+                    groupwise_scales, init_b, rmsd_b = closedform_beta_glm_logphi(
                         X=input_data.X,
+                        design_loc=input_data.design_loc,
+                        constraints_loc=input_data.constraints_loc.values,
                         design_scale=input_data.design_scale,
                         constraints=input_data.constraints_scale.values,
                         size_factors=size_factors_init,
-                        groupwise_means=groupwise_means,
-                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
+                        link_fn=lambda r: np.log(self.np_clip_param(r, "r"))
                     )
 
-                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for dispersion")
+                    logger.debug("Using closed-form MME initialization for dispersion")
+                    logger.debug("Should train r: %s", self._train_scale)
                 elif init_b.lower() == "all_zero":
                     init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
 
-                    logging.getLogger("batchglm").debug("Using standard initialization for dispersion")
+                    logger.debug("Using standard initialization for dispersion")
+                    logger.debug("Should train r: %s", self._train_scale)
                 else:
                     raise ValueError("init_b string %s not recognized" % init_b)
-                logging.getLogger("batchglm").debug("Should train r: %s", self._train_scale)
         else:
             # Locations model:
             if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"):
@@ -281,7 +294,7 @@ def init_par(
                     init_loc[my_idx] = init_model.a_var[init_idx]
 
                 init_a = init_loc
-                logging.getLogger("batchglm").debug("Using initialization based on input model for mean")
+                logger.debug("Using initialization based on input model for mean")
 
             # Scale model:
             if isinstance(init_b, str) and (init_b.lower() == "auto" or init_b.lower() == "init_model"):
@@ -295,7 +308,7 @@ def init_par(
                     init_scale[my_idx] = init_model.b_var[init_idx]
 
                 init_b = init_scale
-                logging.getLogger("batchglm").debug("Using initialization based on input model for dispersion")
+                logger.debug("Using initialization based on input model for dispersion")
 
         return init_a, init_b
 
diff --git a/batchglm/train/tf/glm_beta/external.py b/batchglm/train/tf/glm_beta/external.py
index 78c25640..2e3f2add 100644
--- a/batchglm/train/tf/glm_beta/external.py
+++ b/batchglm/train/tf/glm_beta/external.py
@@ -3,7 +3,7 @@
 from batchglm.models.base.input import SparseXArrayDataSet, SparseXArrayDataArray
 from batchglm.models.glm_beta import AbstractEstimator, EstimatorStoreXArray, InputData, Model
 from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
-from batchglm.models.glm_beta.utils import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize
+from batchglm.models.glm_beta.utils import closedform_beta_glm_logmu, closedform_beta_glm_logphi
 
 import batchglm.train.tf.ops as op_utils
 import batchglm.train.tf.train as train_utils
diff --git a/batchglm/train/tf/glm_beta/fim.py b/batchglm/train/tf/glm_beta/fim.py
index 69817fbe..c2cf1301 100644
--- a/batchglm/train/tf/glm_beta/fim.py
+++ b/batchglm/train/tf/glm_beta/fim.py
@@ -8,18 +8,36 @@
 
 
 class FIM(FIMGLMALL):
-    # No Fisher Information Matrices due to unsolvable E[log(X)]
 
     def _weight_fim_aa(
             self,
             loc,
             scale
     ):
-        assert False, "not implemented"
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+        const = loc * (tf.digamma(loc + scale) - tf.digamma(loc) + loc * (tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, loc)))
+        const2 = const + loc * loc / (loc + scale)
+
+        return const2
 
     def _weight_fim_bb(
             self,
             loc,
             scale
     ):
-        assert False, "not implemented"
\ No newline at end of file
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+        const = scale * (tf.digamma(loc + scale) - tf.digamma(scale) + scale * (
+                    tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, scale)))
+        const2 = const + scale * scale / (loc + scale)
+
+        return const2
+
+    def _weight_fim_ab(
+            self,
+            loc,
+            scale
+    ):
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+        const = tf.polygamma(scalar_one, loc + scale) * loc * scale
+
+        return const
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta/hessians.py b/batchglm/train/tf/glm_beta/hessians.py
index d4bbb165..a3419914 100644
--- a/batchglm/train/tf/glm_beta/hessians.py
+++ b/batchglm/train/tf/glm_beta/hessians.py
@@ -9,49 +9,32 @@
 
 class Hessians(HessianGLMALL):
 
-    def _weight_hessian_aa(
+    def _weight_hessian_ab(
             self,
             X,
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
-        loc_times_scale = loc * scale
-        one_minus_loc_times_scale = one_minus_loc * scale
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+        const = tf.polygamma(scalar_one, loc + scale) * loc * scale
 
-        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -1))
-        else:
-            const1 = tf.log(X / (1 - X))
-
-        const2 = (1 - 2 * loc) * (- tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1)
-        const3 = loc * one_minus_loc_times_scale * (- tf.polygamma(scalar_one, loc_times_scale) - tf.polygamma(scalar_one, one_minus_loc_times_scale))
-        const = loc * one_minus_loc_times_scale * (const2 + const3)
         return const
 
-    def _weight_hessian_ab(
+    def _weight_hessian_aa(
             self,
             X,
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
-        loc_times_scale = loc * scale
-        one_minus_loc_times_scale = one_minus_loc * scale
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-
+        const = loc * (tf.digamma(loc+scale) - tf.digamma(loc) + loc*(tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, loc)))
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -1))
+            const1 = X.__mul__(loc)
+            const2 = tf.sparse.add(const1, const)
         else:
-            const1 = tf.log(X / (1 - X))
+            const2 = const + X * loc
 
-        const2 = - tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1
-        const3 = scale * (- tf.polygamma(scalar_one, loc_times_scale) * loc + one_minus_loc * tf.polygamma(scalar_one, one_minus_loc_times_scale))
-
-        const = loc * one_minus_loc_times_scale * (const2 + const3)
-
-        return const
+        return const2
 
     def _weight_hessian_bb(
             self,
@@ -59,24 +42,13 @@ def _weight_hessian_bb(
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
-        loc_times_scale = loc * scale
-        one_minus_loc_times_scale = one_minus_loc * scale
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-
+        const = scale * (tf.digamma(loc+scale) - tf.digamma(scale) + scale*(tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, scale)))
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -1))
+            const1 = X.__mul__(scale)
+            const2 = tf.sparse.add(const1, const)
         else:
-            const1 = tf.log(X / (1 - X))
-
-        const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\
-                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
-                 + tf.digamma(scale)
-        const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\
-                          + tf.polygamma(scalar_one, scale)\
-                          - tf.polygamma(scalar_one, one_minus_loc_times_scale) * tf.square(one_minus_loc))
-        const = scale * (const2 + const3)
-
-        return const
+            const2 = const + X * scale
 
+        return const2
 
diff --git a/batchglm/train/tf/glm_beta/jacobians.py b/batchglm/train/tf/glm_beta/jacobians.py
index 1d97149d..51d0d898 100644
--- a/batchglm/train/tf/glm_beta/jacobians.py
+++ b/batchglm/train/tf/glm_beta/jacobians.py
@@ -15,14 +15,13 @@ def _weights_jac_a(
             loc,
             scale,
     ):
-        one_minus_loc = 1 - loc
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X)/-tf.sparse.add(X, -1))
+            const = tf.sparse.add(X, tf.digamma(loc+scale) - tf.digamma(loc))
         else:
-            const1 = tf.log(X/(1-X))
-        const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1
-        const = const2 * scale * loc * one_minus_loc
-        return const
+            const = tf.digamma(loc+scale) - tf.digamma(loc) + X
+        const1 = const * loc
+
+        return const1
 
     def _weights_jac_b(
             self,
@@ -30,13 +29,11 @@ def _weights_jac_b(
             loc,
             scale,
     ):
+        # Pre-define sub-graphs that are used multiple times:
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            one_minus_X = - tf.sparse.add(X, -1)
-            Xdense = tf.sparse.to_dense(X)
+            const = - tf.sparse_add(X, - tf.digamma(loc+scale) + tf.digamma(scale) -tf.ones(shape=X.dense_shape, dtype=self.dtype))
         else:
-            one_minus_X = 1 - X
-            Xdense = X
-        one_minus_loc = 1 - loc
-        const = scale * (tf.digamma(scale) - tf.digamma(loc*scale)*loc - tf.digamma(one_minus_loc*scale)*one_minus_loc\
-            + loc * tf.log(Xdense) + one_minus_loc * tf.log(one_minus_X))
-        return const
+            const = tf.digamma(loc+scale) - tf.digamma(scale) + tf.ones_like(X) - X
+        const1 = const * scale
+
+        return const1
diff --git a/batchglm/train/tf/glm_beta/model.py b/batchglm/train/tf/glm_beta/model.py
index ed9c9631..69647650 100644
--- a/batchglm/train/tf/glm_beta/model.py
+++ b/batchglm/train/tf/glm_beta/model.py
@@ -26,27 +26,24 @@ def param_bounds(
             dmax = np.finfo(dtype).max
             dtype = dtype.type
 
-        zero = np.nextafter(0, np.inf, dtype=dtype)
-        one = np.nextafter(1, -np.inf, dtype=dtype)
-
         sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
         bounds_min = {
-            "a_var": np.log(zero/(1-zero)) / sf,
-            "b_var": np.log(zero) / sf,
-            "eta_loc": np.log(zero/(1-zero)) / sf,
-            "eta_scale": np.log(zero) / sf,
-            "mean": np.nextafter(0, np.inf, dtype=dtype),
-            "samplesize": np.nextafter(0, np.inf, dtype=dtype),
+            "a_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
+            "b_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
+            "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
+            "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
+            "mu": np.nextafter(0, np.inf, dtype=dtype),
+            "r": np.nextafter(0, np.inf, dtype=dtype),
             "probs": dtype(0),
-            "log_probs": np.log(zero),
+            "log_probs": np.log(np.nextafter(0, np.inf, dtype=dtype)),
         }
         bounds_max = {
-            "a_var": np.log(one/(1-one)) / sf,
+            "a_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            "eta_loc": np.log(one/(1-one)) / sf,
+            "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            "mean": one,
-            "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "mu": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "r": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
             "probs": dtype(1),
             "log_probs": dtype(0),
         }
@@ -81,6 +78,9 @@ def __init__(
         else:
             eta_loc = tf.matmul(design_loc, a_var)
 
+        if size_factors is not None:
+            eta_loc = tf.add(eta_loc, tf.log(size_factors))
+
         eta_loc = self.tf_clip_param(eta_loc, "eta_loc")
 
         if constraints_scale is not None:
@@ -91,33 +91,25 @@ def __init__(
         eta_scale = self.tf_clip_param(eta_scale, "eta_scale")
         
         # Inverse linker functions:
-        model_loc = 1/(1+tf.exp(-eta_loc))
+        model_loc = tf.exp(eta_loc)
         model_scale = tf.exp(eta_scale)
 
         # Log-likelihood:
+        log_r_plus_mu = tf.log(model_scale + model_loc)
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            one_minus_X = -tf.sparse.add(X, -1)
-            Xdense = tf.sparse.to_dense(X)
+            Xdense = tf.sparse_to_dense(X)
+            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
         else:
-            one_minus_X = 1 - X
             Xdense = X
+            one_minus_X = tf.ones_like(X)-X
+
+        log_probs = tf.lgamma(model_loc+model_scale) - tf.lgamma(model_loc) - tf.lgamma(model_scale)\
+                    + (model_loc - tf.ones_like(model_loc)) * Xdense + (model_scale - tf.ones_like(model_scale)) * one_minus_X
 
-        one_minus_loc = 1 - model_loc
-        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale)\
-                    - tf.lgamma(one_minus_loc * model_scale)\
-                    + (model_scale * model_loc - 1) * tf.log(Xdense)\
-                    + (one_minus_loc * model_scale - 1) * tf.log(one_minus_X)
-        a = tf.print("log_probs: \n", log_probs)
-        b = tf.print("model_loc: \n", model_loc)
-        c = tf.print("model_scale: \n", model_scale)
-        d = tf.print("X: \n", X)
-        e = tf.print("a_var: \n", a_var)
-        f = tf.print("eta_loc: \n", eta_loc)
-        with tf.control_dependencies([a, b, c, d, e, f]):
-            log_probs = self.tf_clip_param(log_probs, "log_probs")
+        log_probs = self.tf_clip_param(log_probs, "log_probs")
 
         # Variance:
-        sigma2 = (model_loc * one_minus_loc) / (1 + model_scale)
+        sigma2 = (model_loc * model_scale) / ((model_loc + model_scale)**2 * (model_loc + model_scale + tf.ones_like(model_scale)))
 
         self.X = X
         self.design_loc = design_loc
@@ -133,8 +125,8 @@ def __init__(
         self.eta_scale = eta_scale
         self.model_loc = model_loc
         self.model_scale = model_scale
-        self.mean = model_loc
-        self.samplesize = model_scale
+        self.mu = model_loc
+        self.r = model_scale
 
         self.log_probs = log_probs
 
diff --git a/batchglm/train/tf/glm_beta/reducible_tensors.py b/batchglm/train/tf/glm_beta/reducible_tensors.py
index a89103ea..862ccaf8 100644
--- a/batchglm/train/tf/glm_beta/reducible_tensors.py
+++ b/batchglm/train/tf/glm_beta/reducible_tensors.py
@@ -5,7 +5,7 @@
 from .jacobians import Jacobians
 from .fim import FIM
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("batchglm")
 
 
 class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL):
diff --git a/batchglm/train/tf/glm_beta/training_strategies.py b/batchglm/train/tf/glm_beta/training_strategies.py
index 9bd8b271..d9e57377 100644
--- a/batchglm/train/tf/glm_beta/training_strategies.py
+++ b/batchglm/train/tf/glm_beta/training_strategies.py
@@ -1,37 +1,27 @@
 from enum import Enum
 
+
 class TrainingStrategies(Enum):
 
     AUTO = None
     DEFAULT = [
         {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-8,
+            "convergence_criteria": "all_converged",
             "use_batching": False,
-            "optim_algo": "nr_tr",
+            "optim_algo": "irls_gd_tr",
         },
     ]
-    INEXACT = [
+    IRLS = [
         {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-6,
+            "convergence_criteria": "all_converged",
             "use_batching": False,
-            "optim_algo": "nr_tr",
+            "optim_algo": "irls_gd_tr",
         },
     ]
-    EXACT = [
+    IRLS_BATCHED = [
         {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-8,
-            "use_batching": False,
-            "optim_algo": "nr_tr",
+            "convergence_criteria": "all_converged",
+            "use_batching": True,
+            "optim_algo": "irls_gd_tr",
         },
     ]
-    IRLS = [
-        {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-8,
-            "use_batching": False,
-            "optim_algo": "irls_tr",
-        },
-    ]
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta2/__init__.py b/batchglm/train/tf/glm_beta2/__init__.py
new file mode 100644
index 00000000..4db081bb
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/__init__.py
@@ -0,0 +1,7 @@
+from .estimator import Estimator
+from .estimator_graph import EstimatorGraph
+from .model import BasicModelGraph, ModelVars, ProcessModel
+from .hessians import Hessians
+from .fim import FIM
+from .jacobians import Jacobians
+from .reducible_tensors import ReducibleTensors
diff --git a/batchglm/train/tf/glm_beta2/estimator.py b/batchglm/train/tf/glm_beta2/estimator.py
new file mode 100644
index 00000000..668bd421
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/estimator.py
@@ -0,0 +1,307 @@
+import logging
+from typing import Union
+
+import numpy as np
+import tensorflow as tf
+
+from .external import AbstractEstimator, EstimatorAll, ESTIMATOR_PARAMS, InputData, Model
+from .external import closedform_beta2_glm_logitmean, closedform_beta2_glm_logsamplesize
+from .external import SparseXArrayDataArray
+from .estimator_graph import EstimatorGraph
+from .model import ProcessModel
+from .training_strategies import TrainingStrategies
+
+logger = logging.getLogger("batchglm")
+
+
+class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
+    """
+    Estimator for Generalized Linear Models (GLMs) with beta2 distributed noise.
+    Uses a logit linker function for loc and log linker function for scale.
+    """
+
+    def __init__(
+            self,
+            input_data: InputData,
+            batch_size: int = 500,
+            graph: tf.Graph = None,
+            init_model: Model = None,
+            init_a: Union[np.ndarray, str] = "AUTO",
+            init_b: Union[np.ndarray, str] = "AUTO",
+            quick_scale: bool = False,
+            model: EstimatorGraph = None,
+            provide_optimizers: dict = {
+                "gd": True,
+                "adam": True,
+                "adagrad": True,
+                "rmsprop": True,
+                "nr": True,
+                "nr_tr": True,
+                "irls": False,
+                "irls_gd": False,
+                "irls_tr": False,
+                "irls_gd_tr": False,
+            },
+            provide_batched: bool = False,
+            provide_fim: bool = False,
+            provide_hessian: bool = False,
+            optim_algos: list = [],
+            extended_summary=False,
+            dtype="float64"
+    ):
+        """
+        Performs initialisation and creates a new estimator.
+
+        :param input_data: InputData
+            The input data
+        :param batch_size: int
+            Size of mini-batches used.
+        :param graph: (optional) tf.Graph
+        :param init_model: (optional)
+            If provided, this model will be used to initialize this Estimator.
+        :param init_a: (Optional)
+            Low-level initial values for a. Can be:
+
+            - str:
+                * "auto": automatically choose best initialization
+                * "random": initialize with random values
+                * "standard": initialize intercept with observed mean
+                * "init_model": initialize with another model (see `ìnit_model` parameter)
+                * "closed_form": try to initialize with closed form
+            - np.ndarray: direct initialization of 'a'
+        :param init_b: (Optional)
+            Low-level initial values for b. Can be:
+
+            - str:
+                * "auto": automatically choose best initialization
+                * "random": initialize with random values
+                * "standard": initialize with zeros
+                * "init_model": initialize with another model (see `ìnit_model` parameter)
+                * "closed_form": try to initialize with closed form
+            - np.ndarray: direct initialization of 'b'
+        :param quick_scale: bool
+            Whether `scale` will be fitted faster and maybe less accurate.
+            Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
+        :param model: EstimatorGraph
+            EstimatorGraph to use. Basically for debugging.
+        :param provide_optimizers:
+
+            E.g.    {"gd": False, "adam": False, "adagrad": False, "rmsprop": False,
+                    "nr": False, "nr_tr": True,
+                    "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}
+        :param provide_batched: bool
+            Whether mini-batched optimizers should be provided.
+        :param provide_fim: Whether to compute fisher information matrix during training
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param provide_hessian: Whether to compute hessians during training
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param optim_algos: Algorithms that you want to use on this object. Depending on that,
+            the hessian and/or fisher information matrix are computed.
+            Either supply provide_fim and provide_hessian or optim_algos.
+        :param extended_summary: Include detailed information in the summaries.
+            Will increase runtime of summary writer, use only for debugging.
+        :param dtype: Precision used in tensorflow.
+        """
+        self.TrainingStrategies = TrainingStrategies
+
+        self._input_data = input_data
+        self._train_loc = True
+        self._train_scale = True
+
+        (init_a, init_b) = self.init_par(
+            input_data=input_data,
+            init_a=init_a,
+            init_b=init_b,
+            init_model=init_model
+        )
+        init_a = init_a.astype(dtype)
+        init_b = init_b.astype(dtype)
+        if quick_scale:
+            self._train_scale = False
+
+        print("init_a")
+        print(init_a)
+        print("init_b")
+        print(init_b)
+
+        if len(optim_algos) > 0:
+            if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
+                provide_hessian = True
+            if np.any([x.lower() in ["irls", "irls_tr"] for x in optim_algos]):
+                provide_fim = True
+
+        EstimatorAll.__init__(
+            self=self,
+            input_data=input_data,
+            batch_size=batch_size,
+            graph=graph,
+            init_a=init_a,
+            init_b=init_b,
+            model=model,
+            provide_optimizers=provide_optimizers,
+            provide_batched=provide_batched,
+            provide_fim=provide_fim,
+            provide_hessian=provide_hessian,
+            extended_summary=extended_summary,
+            noise_model="beta2",
+            dtype=dtype
+        )
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return ESTIMATOR_PARAMS
+
+    def init_par(
+            self,
+            input_data,
+            init_a,
+            init_b,
+            init_model
+    ):
+        r"""
+        standard:
+        Only initialise intercept and keep other coefficients as zero.
+
+        closed-form:
+        Initialize with Maximum Likelihood / Maximum of Momentum estimators
+
+        Idea:
+        $$
+            \theta &= f(x) \\
+            \Rightarrow f^{-1}(\theta) &= x \\
+                &= (D \cdot D^{+}) \cdot x \\
+                &= D \cdot (D^{+} \cdot x) \\
+                &= D \cdot x' = f^{-1}(\theta)
+        $$
+        """
+
+        size_factors_init = input_data.size_factors
+
+        if init_model is None:
+            groupwise_means = None
+            init_a_str = None
+            if isinstance(init_a, str):
+                init_a_str = init_a.lower()
+                # Chose option if auto was chosen
+                if init_a.lower() == "auto":
+                    init_a = "closed_form"
+
+                if init_a.lower() == "closed_form":
+                    groupwise_means, init_a, rmsd_a = closedform_beta2_glm_logitmean(
+                        X=input_data.X,
+                        design_loc=input_data.design_loc,
+                        constraints_loc=input_data.constraints_loc.values,
+                        size_factors=size_factors_init,
+                        link_fn=lambda mean: np.log(
+                            1/(1/self.np_clip_param(mean, "mean")-1)
+                        )
+                    )
+
+                    # train mu, if the closed-form solution is inaccurate
+                    self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0)
+
+
+                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for mean")
+                elif init_a.lower() == "standard":
+                    if isinstance(input_data.X, SparseXArrayDataArray):
+                        overall_means = input_data.X.mean(dim="observations")
+                    else:
+                        overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
+                    overall_means = self.np_clip_param(overall_means, "mean")
+
+                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    init_a[0, :] = np.log(overall_means/(1-overall_means))
+                    self._train_loc = True
+
+                    logging.getLogger("batchglm").debug("Using standard initialization for mean")
+                elif init_a.lower() == "all_zero":
+                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    self._train_loc = True
+
+                    logging.getLogger("batchglm").debug("Using all zero initialization for mean")
+                else:
+                    raise ValueError("init_a string %s not recognized" % init_a)
+                logging.getLogger("batchglm").debug("Should train mean: %s", self._train_loc)
+            if isinstance(init_b, str):
+                if init_b.lower() == "auto":
+                    init_b = "standard"
+
+                if init_b.lower() == "standard":
+                    groupwise_scales, init_b_intercept, rmsd_b = closedform_beta2_glm_logsamplesize(
+                        X=input_data.X,
+                        design_scale=input_data.design_scale[:, [0]],
+                        constraints=input_data.constraints_scale[[0], [0]].values,
+                        size_factors=size_factors_init,
+                        groupwise_means=None,
+                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
+                    )
+                    init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
+                    init_b[0, :] = init_b_intercept
+
+                    logging.getLogger("batchglm").debug("Using standard-form MME initialization for dispersion")
+                elif init_b.lower() == "closed_form":
+                    dmats_unequal = False
+                    if input_data.design_loc.shape[1] == input_data.design_scale.shape[1]:
+                        if np.any(input_data.design_loc.values != input_data.design_scale.values):
+                            dmats_unequal = True
+
+                    inits_unequal = False
+                    if init_a_str is not None:
+                        if init_a_str != init_b:
+                            inits_unequal = True
+
+                    if inits_unequal or dmats_unequal:
+                        raise ValueError("cannot use closed_form init for scale model " +
+                                         "if scale model differs from loc model")
+
+                    groupwise_scales, init_b, rmsd_b = closedform_beta2_glm_logsamplesize(
+                        X=input_data.X,
+                        design_scale=input_data.design_scale,
+                        constraints=input_data.constraints_scale.values,
+                        size_factors=size_factors_init,
+                        groupwise_means=groupwise_means,
+                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
+                    )
+
+                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for dispersion")
+                elif init_b.lower() == "all_zero":
+                    init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
+
+                    logging.getLogger("batchglm").debug("Using standard initialization for dispersion")
+                else:
+                    raise ValueError("init_b string %s not recognized" % init_b)
+                logging.getLogger("batchglm").debug("Should train r: %s", self._train_scale)
+        else:
+            # Locations model:
+            if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"):
+                my_loc_names = set(input_data.loc_names.values)
+                my_loc_names = my_loc_names.intersection(set(init_model.input_data.loc_names.values))
+
+                init_loc = np.zeros([input_data.num_loc_params, input_data.num_features])
+                for parm in my_loc_names:
+                    init_idx = np.where(init_model.input_data.loc_names == parm)[0]
+                    my_idx = np.where(input_data.loc_names == parm)[0]
+                    init_loc[my_idx] = init_model.a_var[init_idx]
+
+                init_a = init_loc
+                logging.getLogger("batchglm").debug("Using initialization based on input model for mean")
+
+            # Scale model:
+            if isinstance(init_b, str) and (init_b.lower() == "auto" or init_b.lower() == "init_model"):
+                my_scale_names = set(input_data.scale_names.values)
+                my_scale_names = my_scale_names.intersection(init_model.input_data.scale_names.values)
+
+                init_scale = np.zeros([input_data.num_scale_params, input_data.num_features])
+                for parm in my_scale_names:
+                    init_idx = np.where(init_model.input_data.scale_names == parm)[0]
+                    my_idx = np.where(input_data.scale_names == parm)[0]
+                    init_scale[my_idx] = init_model.b_var[init_idx]
+
+                init_b = init_scale
+                logging.getLogger("batchglm").debug("Using initialization based on input model for dispersion")
+
+        return init_a, init_b
+
+    @property
+    def input_data(self) -> InputData:
+        return self._input_data
diff --git a/batchglm/train/tf/glm_beta2/estimator_graph.py b/batchglm/train/tf/glm_beta2/estimator_graph.py
new file mode 100644
index 00000000..8e609600
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/estimator_graph.py
@@ -0,0 +1,12 @@
+import logging
+
+from .model import ProcessModel
+from .external import EstimatorGraphAll
+
+logger = logging.getLogger(__name__)
+
+
+class EstimatorGraph(ProcessModel, EstimatorGraphAll):
+    """
+    Full class.
+    """
diff --git a/batchglm/train/tf/glm_beta2/external.py b/batchglm/train/tf/glm_beta2/external.py
new file mode 100644
index 00000000..54facb78
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/external.py
@@ -0,0 +1,20 @@
+import batchglm.data as data_utils
+
+from batchglm.models.base.input import SparseXArrayDataSet, SparseXArrayDataArray
+from batchglm.models.glm_beta2 import AbstractEstimator, EstimatorStoreXArray, InputData, Model
+from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
+from batchglm.models.glm_beta2.utils import closedform_beta2_glm_logitmean, closedform_beta2_glm_logsamplesize
+
+import batchglm.train.tf.ops as op_utils
+import batchglm.train.tf.train as train_utils
+from batchglm.train.tf.base import TFEstimatorGraph, MonitoredTFEstimator
+
+from batchglm.train.tf.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BasicModelGraphGLM
+from batchglm.train.tf.base_glm import ESTIMATOR_PARAMS, ProcessModelGLM, ModelVarsGLM
+from batchglm.train.tf.base_glm import HessiansGLM, FIMGLM, JacobiansGLM
+
+from batchglm.train.tf.base_glm_all import EstimatorAll, EstimatorGraphAll, FIMGLMALL, HessianGLMALL, JacobiansGLMALL, ReducableTensorsGLMALL
+
+import batchglm.utils.random as rand_utils
+from batchglm.utils.linalg import groupwise_solve_lm
+from batchglm import pkg_constants
diff --git a/batchglm/train/tf/glm_beta2/fim.py b/batchglm/train/tf/glm_beta2/fim.py
new file mode 100644
index 00000000..69817fbe
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/fim.py
@@ -0,0 +1,25 @@
+import tensorflow as tf
+
+import logging
+
+from .external import FIMGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class FIM(FIMGLMALL):
+    # No Fisher Information Matrices due to unsolvable E[log(X)]
+
+    def _weight_fim_aa(
+            self,
+            loc,
+            scale
+    ):
+        assert False, "not implemented"
+
+    def _weight_fim_bb(
+            self,
+            loc,
+            scale
+    ):
+        assert False, "not implemented"
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta2/hessians.py b/batchglm/train/tf/glm_beta2/hessians.py
new file mode 100644
index 00000000..066ebe4f
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/hessians.py
@@ -0,0 +1,82 @@
+import tensorflow as tf
+
+import logging
+
+from .external import HessianGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class Hessians(HessianGLMALL):
+
+    def _weight_hessian_aa(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        one_minus_loc = tf.ones_like(loc) - loc
+        loc_times_scale = loc * scale
+        one_minus_loc_times_scale = one_minus_loc * scale
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
+        else:
+            const1 = tf.log(X / (tf.ones_like(X) - X))
+
+        const2 = (tf.ones_like(loc) - 2 * loc) * (- tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1)
+        const3 = loc * one_minus_loc_times_scale * (- tf.polygamma(scalar_one, loc_times_scale) - tf.polygamma(scalar_one, one_minus_loc_times_scale))
+        const = loc * one_minus_loc_times_scale * (const2 + const3)
+        return const
+
+    def _weight_hessian_ab(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        one_minus_loc = tf.ones_like(loc) - loc
+        loc_times_scale = loc * scale
+        one_minus_loc_times_scale = one_minus_loc * scale
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
+        else:
+            const1 = tf.log(X / (tf.ones_like(X) - X))
+
+        const2 = - tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1
+        const3 = scale * (- tf.polygamma(scalar_one, loc_times_scale) * loc + one_minus_loc * tf.polygamma(scalar_one, one_minus_loc_times_scale))
+
+        const = loc * one_minus_loc_times_scale * (const2 + const3)
+
+        return const
+
+    def _weight_hessian_bb(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        one_minus_loc = tf.ones_like(loc) - loc
+        loc_times_scale = loc * scale
+        one_minus_loc_times_scale = one_minus_loc * scale
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
+        else:
+            const1 = tf.log(X / (tf.ones_like(X) - X))
+
+        const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\
+                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
+                 + tf.digamma(scale)
+        const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\
+                          + tf.polygamma(scalar_one, scale)\
+                          - tf.polygamma(scalar_one, one_minus_loc_times_scale) * tf.square(one_minus_loc))
+        const = scale * (const2 + const3)
+
+        return const
+
+
diff --git a/batchglm/train/tf/glm_beta2/jacobians.py b/batchglm/train/tf/glm_beta2/jacobians.py
new file mode 100644
index 00000000..1eec6172
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/jacobians.py
@@ -0,0 +1,42 @@
+import logging
+
+import tensorflow as tf
+
+from .external import JacobiansGLMALL
+
+logger = logging.getLogger(__name__)
+
+
+class Jacobians(JacobiansGLMALL):
+
+    def _weights_jac_a(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        one_minus_loc = tf.ones_like(loc) - loc
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            const1 = tf.log(tf.sparse.to_dense(X)/-tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
+        else:
+            const1 = tf.log(X/(tf.ones_like(X)-X))
+        const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1
+        const = const2 * scale * loc * one_minus_loc
+        return const
+
+    def _weights_jac_b(
+            self,
+            X,
+            loc,
+            scale,
+    ):
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
+            Xdense = tf.sparse.to_dense(X)
+        else:
+            one_minus_X = tf.ones_like(X) - X
+            Xdense = X
+        one_minus_loc = tf.ones_like(X) - loc
+        const = scale * (tf.digamma(scale) - tf.digamma(loc*scale)*loc - tf.digamma(one_minus_loc*scale)*one_minus_loc
+            + loc * tf.log(Xdense) + one_minus_loc * tf.log(one_minus_X))
+        return const
diff --git a/batchglm/train/tf/glm_beta2/model.py b/batchglm/train/tf/glm_beta2/model.py
new file mode 100644
index 00000000..a5e3f8cd
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/model.py
@@ -0,0 +1,145 @@
+import logging
+
+import tensorflow as tf
+
+import numpy as np
+
+from .external import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM
+from .external import pkg_constants
+
+logger = logging.getLogger(__name__)
+
+
+class ProcessModel(ProcessModelGLM):
+
+    def param_bounds(
+            self,
+            dtype
+    ):
+        if isinstance(dtype, tf.DType):
+            dmin = dtype.min
+            dmax = dtype.max
+            dtype = dtype.as_numpy_dtype
+        else:
+            dtype = np.dtype(dtype)
+            dmin = np.finfo(dtype).min
+            dmax = np.finfo(dtype).max
+            dtype = dtype.type
+
+        zero = np.nextafter(0, np.inf, dtype=dtype)
+        one = np.nextafter(1, -np.inf, dtype=dtype)
+
+        sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
+        bounds_min = {
+            #"a_var": np.log(zero/(1-zero)) / sf,
+            "a_var": dmin,
+            "b_var": np.log(zero) / sf,
+            #"eta_loc": np.log(zero/(1-zero)) / sf,
+            "eta_loc": dmin,
+            "eta_scale": np.log(zero) / sf,
+            "mean": zero,
+            "samplesize": zero,
+            "probs": dtype(0),
+            "log_probs": np.log(zero),
+        }
+        bounds_max = {
+            #"a_var": np.log(one/(1-one)) / sf,
+            "a_var": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
+            "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            #"eta_loc": np.log(one/(1-one)) / sf,
+            "eta_loc": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
+            "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            "mean": one,
+            "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "probs": dtype(1),
+            "log_probs": dtype(0),
+        }
+        return bounds_min, bounds_max
+
+
+class ModelVars(ProcessModel, ModelVarsGLM):
+    """
+    Full class.
+    """
+
+
+class BasicModelGraph(ProcessModel, BasicModelGraphGLM):
+
+    def __init__(
+            self,
+            X,
+            design_loc,
+            design_scale,
+            constraints_loc,
+            constraints_scale,
+            a_var,
+            b_var,
+            dtype,
+            size_factors=None
+    ):
+        a_var = self.tf_clip_param(a_var, "a_var")
+        b_var = self.tf_clip_param(b_var, "b_var")
+
+        if constraints_loc is not None:
+            eta_loc = tf.matmul(design_loc, tf.matmul(constraints_loc, a_var))
+        else:
+            eta_loc = tf.matmul(design_loc, a_var)
+
+        eta_loc = self.tf_clip_param(eta_loc, "eta_loc")
+
+        if constraints_scale is not None:
+            eta_scale = tf.matmul(design_scale, tf.matmul(constraints_scale, b_var))
+        else:
+            eta_scale = tf.matmul(design_scale, b_var)
+
+        eta_scale = self.tf_clip_param(eta_scale, "eta_scale")
+        
+        # Inverse linker functions:
+        model_loc = tf.ones_like(eta_loc)/(tf.ones_like(eta_loc)+tf.exp(-eta_loc))
+        model_scale = tf.exp(eta_scale)
+
+        # Log-likelihood:
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            one_minus_X = -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=dtype))
+            Xdense = tf.sparse.to_dense(X)
+        else:
+            one_minus_X = tf.ones_like(X) - X
+            Xdense = X
+
+        one_minus_loc = tf.ones_like(model_loc) - model_loc
+        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale)\
+                    - tf.lgamma(one_minus_loc * model_scale)\
+                    + (model_scale * model_loc - tf.ones_like(model_loc)) * tf.log(Xdense)\
+                    + (one_minus_loc * model_scale - tf.ones_like(model_loc)) * tf.log(one_minus_X)
+        a = tf.print("log_probs: \n", log_probs)
+        b = tf.print("model_loc: \n", model_loc)
+        c = tf.print("model_scale: \n", model_scale)
+        d = tf.print("X: \n", X)
+        e = tf.print("a_var: \n", a_var)
+        f = tf.print("eta_loc: \n", eta_loc)
+        with tf.control_dependencies([a, b, c, d, e, f]):
+            log_probs = self.tf_clip_param(log_probs, "log_probs")
+
+        # Variance:
+        sigma2 = (model_loc * one_minus_loc) / (tf.ones_like(model_loc) + model_scale)
+
+        self.X = X
+        self.design_loc = design_loc
+        self.design_scale = design_scale
+        self.constraints_loc = constraints_loc
+        self.constraints_scale = constraints_scale
+        self.a_var = a_var
+        self.b_var = b_var
+        self.size_factors = size_factors
+        self.dtype = dtype
+
+        self.eta_loc = eta_loc
+        self.eta_scale = eta_scale
+        self.model_loc = model_loc
+        self.model_scale = model_scale
+        self.mean = model_loc
+        self.samplesize = model_scale
+
+        self.log_probs = log_probs
+
+        self.sigma2 = sigma2
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta2/reducible_tensors.py b/batchglm/train/tf/glm_beta2/reducible_tensors.py
new file mode 100644
index 00000000..a89103ea
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/reducible_tensors.py
@@ -0,0 +1,13 @@
+import logging
+
+from .external import ReducableTensorsGLMALL
+from .hessians import Hessians
+from .jacobians import Jacobians
+from .fim import FIM
+
+logger = logging.getLogger(__name__)
+
+
+class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL):
+    """
+    """
diff --git a/batchglm/train/tf/glm_beta2/training_strategies.py b/batchglm/train/tf/glm_beta2/training_strategies.py
new file mode 100644
index 00000000..9bd8b271
--- /dev/null
+++ b/batchglm/train/tf/glm_beta2/training_strategies.py
@@ -0,0 +1,37 @@
+from enum import Enum
+
+class TrainingStrategies(Enum):
+
+    AUTO = None
+    DEFAULT = [
+        {
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-8,
+            "use_batching": False,
+            "optim_algo": "nr_tr",
+        },
+    ]
+    INEXACT = [
+        {
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-6,
+            "use_batching": False,
+            "optim_algo": "nr_tr",
+        },
+    ]
+    EXACT = [
+        {
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-8,
+            "use_batching": False,
+            "optim_algo": "nr_tr",
+        },
+    ]
+    IRLS = [
+        {
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-8,
+            "use_batching": False,
+            "optim_algo": "irls_tr",
+        },
+    ]
\ No newline at end of file
diff --git a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
index e95bbc0d..415373c3 100644
--- a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
+++ b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
@@ -36,6 +36,8 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
+            elif noise_model=="beta2":
+                from batchglm.api.models.glm_beta2 import Estimator, InputData
             elif noise_model=="beta":
                 from batchglm.api.models.glm_beta import Estimator, InputData
             elif noise_model=="bern":
@@ -79,7 +81,7 @@ def estimate(self):
                 "convergence_criteria": "all_converged_ll",
                 "stopping_criteria": 1e-6,
                 "use_batching": False,
-                "optim_algo": "irls_gd_tr",
+                "optim_algo": "gd",
                 #"optim_algo": "nr_tr",
             },
         ])
@@ -98,6 +100,9 @@ def eval_estimation_a(
             elif self.noise_model=="norm":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
+            elif self.noise_model=="beta2":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
             elif self.noise_model=="beta":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
@@ -139,6 +144,9 @@ def eval_estimation_b(
             elif self.noise_model == "norm":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
+            elif self.noise_model == "beta2":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
             elif self.noise_model == "beta":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
@@ -183,6 +191,8 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="beta2":
+                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model=="beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model=="bern":
@@ -220,10 +230,14 @@ def simulate_complex(self):
                 rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
                 rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
                 rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
-            elif self.noise_model=="beta":
+            elif self.noise_model=="beta2":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
                 rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(10, 20, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(10, 20, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(10, 20, shape)
             elif self.noise_model=="bern":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
@@ -253,10 +267,14 @@ def simulate_easy(self):
                 rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
                 rand_fn_loc = lambda shape: np.ones(shape)
                 rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
-            elif self.noise_model=="beta":
+            elif self.noise_model=="beta2":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: 0.5*np.ones(shape)
                 rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(10, 20, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(10, 20, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
             elif self.noise_model=="bern":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: 0.5*np.ones(shape)
@@ -364,20 +382,20 @@ def test_a_standard_b_standard(self):
         self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
         self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
 
-class Test_AccuracyAnalytic_GLM_BETA(
+class Test_AccuracyAnalytic_GLM_beta2(
     Test_AccuracyAnalytic_GLM_ALL,
     unittest.TestCase
 ):
     """
-    Test whether optimizers yield exact results for beta distributed noise.
+    Test whether optimizers yield exact results for beta2 distributed noise.
     """
 
     def test_a_closed_b_closed(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_closed_b_closed()")
+        logger.error("Test_AccuracyAnalytic_GLM_beta2.test_a_closed_b_closed()")
 
-        self.noise_model = "beta"
+        self.noise_model = "beta2"
         self.simulate_complex()
         self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
         self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
@@ -385,7 +403,35 @@ def test_a_closed_b_closed(self):
     def test_a_standard_b_standard(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_standard_b_standard()")
+        logger.error("Test_AccuracyAnalytic_GLM_beta2.test_a_standard_b_standard()")
+
+        self.noise_model = "beta2"
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+
+class Test_AccuracyAnalytic_GLM_beta(
+    Test_AccuracyAnalytic_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for beta2 distributed noise.
+    """
+
+    def test_a_closed_b_closed(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_beta.test_a_closed_b_closed()")
+
+        self.noise_model = "beta"
+        self.simulate_complex()
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+
+    def test_a_standard_b_standard(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_beta.test_a_standard_b_standard()")
 
         self.noise_model = "beta"
         self.simulate_easy()
diff --git a/batchglm/unit_test/glm_all/test_graph_glm_all.py b/batchglm/unit_test/glm_all/test_graph_glm_all.py
index 3f476815..872302f7 100644
--- a/batchglm/unit_test/glm_all/test_graph_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_graph_glm_all.py
@@ -30,8 +30,10 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
-            elif noise_model=="beta":
+            elif noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="beta2":
+                from batchglm.api.models.glm_beta2 import Estimator, InputData
             elif noise_model=="bern":
                 from batchglm.api.models.glm_bern import Estimator, InputData
             else:
@@ -103,6 +105,8 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="beta2":
+                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model=="beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model=="bern":
@@ -198,7 +202,33 @@ def test_batched_norm(self):
         self._test_batched(sparse=False)
         self._test_batched(sparse=True)
 
-class Test_Graph_GLM_BETA(
+class Test_Graph_GLM_beta2(
+    Test_Graph_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether training graphs work for beta2 distributed noise.
+    """
+
+    def test_full_beta2(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Graph_GLM_beta2.test_full_beta2()")
+
+        self.noise_model = "beta2"
+        self._test_full(sparse=False)
+        self._test_full(sparse=True)
+
+    def test_batched_beta2(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Graph_GLM_beta2.test_batched_beta2()")
+
+        self.noise_model = "beta2"
+        self._test_batched(sparse=False)
+        self._test_batched(sparse=True)
+
+class Test_Graph_GLM_beta(
     Test_Graph_GLM_ALL,
     unittest.TestCase
 ):
@@ -209,20 +239,20 @@ class Test_Graph_GLM_BETA(
     def test_full_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_BETA.test_full_beta()")
+        logger.error("Test_Graph_GLM_beta.test_full_beta()")
 
         self.noise_model = "beta"
         self._test_full(sparse=False)
-        self._test_full(sparse=True)
+        #self._test_full(sparse=True)
 
     def test_batched_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_BETA.test_batched_beta()")
+        logger.error("Test_Graph_GLM_beta.test_batched_beta()")
 
         self.noise_model = "beta"
         self._test_batched(sparse=False)
-        self._test_batched(sparse=True)
+        #self._test_batched(sparse=True)
 
 
 class Test_Graph_GLM_BERN(
diff --git a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
index a8d7fb50..e016734f 100644
--- a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
@@ -28,6 +28,8 @@ def simulate(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model == "beta2":
+                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model == "bern":
@@ -53,6 +55,8 @@ def get_jacs(
                 from batchglm.api.models.glm_nb import Estimator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Estimator
+            elif self.noise_model == "beta2":
+                from batchglm.api.models.glm_beta2 import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
             elif self.noise_model == "bern":
@@ -100,6 +104,8 @@ def compare_jacs(
                 from batchglm.api.models.glm_nb import InputData
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import InputData
+            elif self.noise_model == "beta2":
+                from batchglm.api.models.glm_beta2 import InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import InputData
             elif self.noise_model == "bern":
@@ -187,6 +193,17 @@ def test_compute_jacobians_norm(self):
         self._test_compute_jacobians(sparse=False)
         #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
 
+class Test_Jacobians_GLM_beta2(Test_Jacobians_GLM_ALL, unittest.TestCase):
+
+    def test_compute_jacobians_beta2(self):
+        logging.getLogger("tensorflow").setLevel(logging.INFO)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logging.getLogger("batchglm").error("Test_Jacobians_GLM_beta2.test_compute_jacobians_beta2()")
+
+        self.noise_model = "beta2"
+        self._test_compute_jacobians(sparse=False)
+        #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
+
 class Test_Jacobians_GLM_BETA(Test_Jacobians_GLM_ALL, unittest.TestCase):
 
     def test_compute_jacobians_beta(self):
diff --git a/batchglm/utils/random.py b/batchglm/utils/random.py
index af70dcaf..c00ed8ea 100644
--- a/batchglm/utils/random.py
+++ b/batchglm/utils/random.py
@@ -173,9 +173,9 @@ def sample(self, size=None):
         return random_data
 
 
-class Beta:
+class beta2:
     r"""
-    Beta distribution.
+    beta2 distribution.
     """
 
     p: np.ndarray
@@ -222,4 +222,35 @@ def sample(self, size=None):
             p=self.p,
             size=size
         )
-        return random_data
\ No newline at end of file
+        return random_data
+
+
+class Beta:
+    r"""
+    Negative binomial distribution.
+    This class supports re-parameterising, sampling and calculation of
+    probabilities of negative binomial distributed data.
+    """
+
+    a: np.ndarray
+    b: np.ndarray
+
+    def __init__(self, a=None, b=None):
+        self.a = a
+        self.b = b
+
+    def sample(self, size=None):
+        """
+        Sample from all distributions data of size `size`.
+        :param size: The size
+        :return: numpy array containing sampled data
+
+        """
+        # numpy uses an alternative parametrization
+        # see also https://en.wikipedia.org/wiki/Negative_binomial_distribution#Alternative_formulations
+        random_data = np.random.beta(
+            a=self.a,
+            b=self.b,
+            size=size
+        )
+        return random_data

From 977d44b2594937849d20ab78fdb8b78ed8dd1521 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Sat, 30 Mar 2019 19:39:20 +0100
Subject: [PATCH 07/12] some changes on new beta (mainly renaming mu and r and
 fixing initialisation)

---
 batchglm/api/utils/random.py                  |   2 +-
 batchglm/models/glm_beta/estimator.py         |   2 +-
 batchglm/models/glm_beta/model.py             |  11 +-
 batchglm/models/glm_beta/simulator.py         |   8 +-
 batchglm/models/glm_beta/utils.py             |  44 +++---
 batchglm/models/glm_beta2/simulator.py        |   2 +-
 .../train/tf/base_glm_all/external_beta.py    |   2 +-
 batchglm/train/tf/glm_beta/estimator.py       | 130 ++++++++----------
 batchglm/train/tf/glm_beta/external.py        |   2 +-
 batchglm/train/tf/glm_beta/model.py           |  26 ++--
 .../glm_all/test_acc_analytic_glm_all_2.py    |  43 +++---
 .../unit_test/glm_all/test_acc_glm_all.py     |  37 +++++
 .../unit_test/glm_all/test_graph_glm_all.py   |  12 +-
 .../glm_all/test_hessians_glm_all.py          |  21 +--
 .../glm_all/test_jacobians_glm_all.py         |   4 +-
 batchglm/utils/random.py                      |   8 +-
 16 files changed, 194 insertions(+), 160 deletions(-)

diff --git a/batchglm/api/utils/random.py b/batchglm/api/utils/random.py
index 68256f3a..64bdcd88 100644
--- a/batchglm/api/utils/random.py
+++ b/batchglm/api/utils/random.py
@@ -1 +1 @@
-from batchglm.utils.random import NegativeBinomial, Normal, beta2, Bernoulli, Beta
+from batchglm.utils.random import NegativeBinomial, Normal, Beta2, Bernoulli, Beta
diff --git a/batchglm/models/glm_beta/estimator.py b/batchglm/models/glm_beta/estimator.py
index 5c62ca1c..2e7457f1 100644
--- a/batchglm/models/glm_beta/estimator.py
+++ b/batchglm/models/glm_beta/estimator.py
@@ -7,7 +7,7 @@
 class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
     r"""
     Estimator base class for generalized linear models (GLMs) with
-    negative binomial noise.
+    beta distributed noise.
     """
 
     @classmethod
diff --git a/batchglm/models/glm_beta/model.py b/batchglm/models/glm_beta/model.py
index dba293f9..d7b7df3f 100644
--- a/batchglm/models/glm_beta/model.py
+++ b/batchglm/models/glm_beta/model.py
@@ -12,8 +12,8 @@
 # Define distribution parameters:
 MODEL_PARAMS = MODEL_PARAMS.copy()
 MODEL_PARAMS.update({
-    "mu": ("observations", "features"),
-    "r": ("observations", "features"),
+    "p": ("observations", "features"),
+    "q": ("observations", "features"),
 })
 
 class Model(_Model_GLM, metaclass=abc.ABCMeta):
@@ -44,17 +44,14 @@ def eta_loc(self) -> xr.DataArray:
             eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
         else:
             eta = np.matmul(self.design_loc.values, self.par_link_loc)
-
-        if self.size_factors is not None:
-            eta += self.link_loc(np.expand_dims(self.size_factors, axis=1))
         return eta
 
     @property
-    def mu(self) -> xr.DataArray:
+    def p(self) -> xr.DataArray:
         return self.location
 
     @property
-    def r(self) -> xr.DataArray:
+    def q(self) -> xr.DataArray:
         return self.scale
 
 
diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
index 5ed90c40..335a0757 100644
--- a/batchglm/models/glm_beta/simulator.py
+++ b/batchglm/models/glm_beta/simulator.py
@@ -6,7 +6,7 @@
 
 class Simulator(_Simulator_GLM, Model):
     """
-    Simulator for Generalized Linear Models (GLMs) with negative binomial noise.
+    Simulator for Generalized Linear Models (GLMs) with beta distributed noise.
     Uses the natural logarithm as linker function.
     """
 
@@ -25,9 +25,9 @@ def __init__(
     def generate_params(
             self,
             rand_fn_ave=lambda shape: np.random.uniform(10, 20, shape),
-            rand_fn=lambda shape: np.abs(np.random.uniform(10, 20, shape)),
+            rand_fn=lambda shape: np.random.uniform(10, 20, shape),
             rand_fn_loc=None,
-            rand_fn_scale=None,
+            rand_fn_scale=lambda shape: np.abs(np.random.uniform(40, 80, shape)),
         ):
         self._generate_params(
             self,
@@ -43,5 +43,5 @@ def generate_data(self):
         """
         self.data["X"] = (
             self.param_shapes()["X"],
-            rand_utils.Beta(a=self.mu, b=self.r).sample()
+            rand_utils.Beta(p=self.p, q=self.q).sample()
         )
diff --git a/batchglm/models/glm_beta/utils.py b/batchglm/models/glm_beta/utils.py
index ef125783..da660659 100644
--- a/batchglm/models/glm_beta/utils.py
+++ b/batchglm/models/glm_beta/utils.py
@@ -7,7 +7,7 @@
 from .external import SparseXArrayDataArray
 
 
-def closedform_beta_glm_logmu(
+def closedform_beta_glm_logp(
         X: Union[xr.DataArray, SparseXArrayDataArray],
         design_loc,
         constraints_loc,
@@ -18,7 +18,7 @@ def closedform_beta_glm_logmu(
         inv_link_fn=np.exp
 ):
     r"""
-    Calculates a closed-form solution for the `mu` parameters of negative-binomial GLMs.
+    Calculates a closed-form solution for the `p` parameters of beta GLMs.
 
     :param X: The sample data
     :param design_loc: design matrix for location
@@ -26,10 +26,14 @@ def closedform_beta_glm_logmu(
         Tensor that encodes how complete parameter set which includes dependent
         parameters arises from indepedent parameters: all = <constraints, indep>.
         This form of constraints is used in vector generalized linear models (VGLMs).
+    :param design_scale: design matrix for scale
+    :param constraints: some design constraints
     :param size_factors: size factors for X
+    :param link_fn: linker function for GLM
+    :param inv_link_fn: inverse linker function for GLM
     :return: tuple: (groupwise_means, mu, rmsd)
     """
-    groupwise_means, mu, rmsd1 =  closedform_glm_mean(
+    groupwise_means, m, rmsd1 =  closedform_glm_mean(
         X=X,
         dmat=design_loc,
         constraints=constraints_loc,
@@ -37,22 +41,24 @@ def closedform_beta_glm_logmu(
         link_fn=link_fn,
         inv_link_fn=inv_link_fn
     )
+    mean = np.exp(m)
 
-    groupwise_scale, var, rmsd2 =  closedform_glm_scale(
+    groupwise_scale, v, rmsd2 =  closedform_glm_scale(
         X=X,
         design_scale=design_scale,
         constraints=constraints,
         size_factors=size_factors,
-        groupwise_means=groupwise_means,
+        groupwise_means=None,
         link_fn=link_fn,
         compute_scales_fun=None
     )
+    var = np.exp(v)
+    p = mean / var * (mean * (1-mean) - var)
+    print("mean: \n", mean, "\n var: \n", var, "\n p: \n", p)
+    return groupwise_means, np.log(p), rmsd1
 
-    mu = mu / var * (mu * (1-mu) - var)
-    return groupwise_means, mu, rmsd1
 
-
-def closedform_beta_glm_logphi(
+def closedform_beta_glm_logq(
         X: Union[xr.DataArray, SparseXArrayDataArray],
         design_loc,
         constraints_loc,
@@ -63,7 +69,7 @@ def closedform_beta_glm_logphi(
         inv_link_fn=np.exp,
 ):
     r"""
-    Calculates a closed-form solution for the `mu` parameters of negative-binomial GLMs.
+    Calculates a closed-form solution for the `q` parameters of beta GLMs.
 
     :param X: The sample data
     :param design_loc: design matrix for location
@@ -71,10 +77,14 @@ def closedform_beta_glm_logphi(
         Tensor that encodes how complete parameter set which includes dependent
         parameters arises from indepedent parameters: all = <constraints, indep>.
         This form of constraints is used in vector generalized linear models (VGLMs).
+    :param design_scale: design matrix for scale
+    :param constraints: some design constraints
     :param size_factors: size factors for X
+    :param link_fn: linker function for GLM
+    :param inv_link_fn: inverse linker function for GLM
     :return: tuple: (groupwise_means, mu, rmsd)
     """
-    groupwise_means, mu, rmsd1 = closedform_glm_mean(
+    groupwise_means, m, rmsd1 = closedform_glm_mean(
         X=X,
         dmat=design_loc,
         constraints=constraints_loc,
@@ -82,16 +92,18 @@ def closedform_beta_glm_logphi(
         link_fn=link_fn,
         inv_link_fn=inv_link_fn
     )
+    mean = np.exp(m)
 
-    groupwise_scale, var, rmsd2 = closedform_glm_scale(
+    groupwise_scale, v, rmsd2 = closedform_glm_scale(
         X=X,
         design_scale=design_scale,
         constraints=constraints,
         size_factors=size_factors,
-        groupwise_means=groupwise_means,
+        groupwise_means=None,
         link_fn=link_fn,
-        compute_scales_fun=None,
+        compute_scales_fun=None
     )
+    var = np.exp(v)
 
-    var = (1 - mu) / var * (mu * (1 - mu) - var)
-    return groupwise_scale, var, rmsd2
+    q = (1 - mean) / var * (mean * (1 - mean) - var)
+    return groupwise_scale, np.log(q), rmsd2
diff --git a/batchglm/models/glm_beta2/simulator.py b/batchglm/models/glm_beta2/simulator.py
index 22558cd6..545543bc 100644
--- a/batchglm/models/glm_beta2/simulator.py
+++ b/batchglm/models/glm_beta2/simulator.py
@@ -43,5 +43,5 @@ def generate_data(self):
         """
         self.data["X"] = (
             self.param_shapes()["X"],
-            rand_utils.beta2(mean=self.mean, samplesize=self.samplesize).sample()
+            rand_utils.Beta2(mean=self.mean, samplesize=self.samplesize).sample()
         )
diff --git a/batchglm/train/tf/base_glm_all/external_beta.py b/batchglm/train/tf/base_glm_all/external_beta.py
index 15eb60f2..5cc41515 100644
--- a/batchglm/train/tf/base_glm_all/external_beta.py
+++ b/batchglm/train/tf/base_glm_all/external_beta.py
@@ -3,4 +3,4 @@
 from batchglm.train.tf.glm_beta import Hessians, FIM, Jacobians, ReducibleTensors
 
 from batchglm.models.glm_beta import AbstractEstimator, EstimatorStoreXArray, InputData, Model
-from batchglm.models.glm_beta.utils import closedform_beta_glm_logmu, closedform_beta_glm_logphi
\ No newline at end of file
+from batchglm.models.glm_beta.utils import closedform_beta_glm_logp, closedform_beta_glm_logq
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta/estimator.py b/batchglm/train/tf/glm_beta/estimator.py
index 1f230534..34b14466 100644
--- a/batchglm/train/tf/glm_beta/estimator.py
+++ b/batchglm/train/tf/glm_beta/estimator.py
@@ -5,7 +5,7 @@
 import tensorflow as tf
 
 from .external import AbstractEstimator, EstimatorAll, ESTIMATOR_PARAMS, InputData, Model
-from .external import closedform_beta_glm_logmu, closedform_beta_glm_logphi
+from .external import closedform_beta_glm_logp, closedform_beta_glm_logq
 from .external import SparseXArrayDataArray
 from .estimator_graph import EstimatorGraph
 from .model import ProcessModel
@@ -16,7 +16,7 @@
 
 class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
     """
-    Estimator for Generalized Linear Models (GLMs) with negative binomial noise.
+    Estimator for Generalized Linear Models (GLMs) with beta noise.
     Uses the natural logarithm as linker function.
     """
 
@@ -116,14 +116,15 @@ def __init__(
         )
         init_a = init_a.astype(dtype)
         init_b = init_b.astype(dtype)
-        if quick_scale:
-            self._train_scale = False
+
+        print("init_a: \n", np.exp(init_a))
+        print("init_b: \n", np.exp(init_b))
 
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
                 provide_hessian = True
             if np.any([x.lower() in ["irls", "irls_tr", "irls_gd", "irls_gd_tr"] for x in optim_algos]):
-                provide_fim = True
+                assert False, "Irls not possible for beta GLM"
 
         EstimatorAll.__init__(
             self=self,
@@ -171,113 +172,92 @@ def init_par(
         """
 
         size_factors_init = input_data.size_factors
-        if size_factors_init is not None:
-            size_factors_init = np.expand_dims(size_factors_init, axis=1)
-            size_factors_init = np.broadcast_to(
-                array=size_factors_init,
-                shape=[input_data.num_observations, input_data.num_features]
-            )
 
         if init_model is None:
-            groupwise_means = None
-            init_a_str = None
             if isinstance(init_a, str):
-                init_a_str = init_a.lower()
                 # Chose option if auto was chosen
                 if init_a.lower() == "auto":
                     init_a = "closed_form"
-
                 if init_a.lower() == "closed_form":
-                    groupwise_means, init_a, rmsd_a = closedform_beta_glm_logmu(
+                    groupwise_means, init_a, rmsd_a = closedform_beta_glm_logp(
                         X=input_data.X,
                         design_loc=input_data.design_loc,
                         constraints_loc=input_data.constraints_loc.values,
                         design_scale=input_data.design_scale,
                         constraints=input_data.constraints_scale.values,
                         size_factors=size_factors_init,
-                        link_fn=lambda mu: np.log(self.np_clip_param(mu, "mu"))
+                        link_fn=lambda p: np.log(self.np_clip_param(p, "p"))
                     )
 
-                    # train mu, if the closed-form solution is inaccurate
+                    # train p, if the closed-form solution is inaccurate
                     self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0)
 
-                    if input_data.size_factors is not None:
-                        if np.any(input_data.size_factors != 1):
-                            self._train_loc = True
-
-                    logger.debug("Using closed-form MLE initialization for mean")
-                    logger.debug("Should train mu: %s", self._train_loc)
-                # elif init_a.lower() == "standard":
-                #     if isinstance(input_data.X, SparseXArrayDataArray):
-                #         overall_means = input_data.X.mean(dim="observations")
-                #     else:
-                #         overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
-                #     overall_means = self.np_clip_param(overall_means, "mu")
-                #
-                #     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
-                #     init_a[0, :] = np.log(overall_means)
-                #     self._train_loc = True
-                #
-                #     logger.debug("Using standard initialization for mean")
-                #     logger.debug("Should train mu: %s", self._train_loc)
+                    logger.debug("Using closed-form MME initialization for p")
+                    logger.debug("Should train p: %s", self._train_loc)
+                elif init_a.lower() == "standard":
+                    groupwise_means, init_a_intercept, rmsd_a = closedform_beta_glm_logp(
+                        X=input_data.X,
+                        design_loc=input_data.design_loc[:, [0]],
+                        constraints_loc=input_data.constraints_loc[[0], [0]].values,
+                        design_scale=input_data.design_scale[:, [0]],
+                        constraints=input_data.constraints_scale[[0], [0]].values,
+                        size_factors=size_factors_init,
+                        link_fn=lambda p: np.log(self.np_clip_param(p, "p"))
+                    )
+                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    init_a[0, :] = init_a_intercept
+                    self._train_loc = True
+
+                    logger.debug("Using standard initialization for p")
+                    logger.debug("Should train p: %s", self._train_loc)
                 elif init_a.lower() == "all_zero":
                     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
                     self._train_loc = True
 
-                    logger.debug("Using all_zero initialization for mean")
-                    logger.debug("Should train mu: %s", self._train_loc)
+                    logger.debug("Using all_zero initialization for p")
+                    logger.debug("Should train p: %s", self._train_loc)
                 else:
                     raise ValueError("init_a string %s not recognized" % init_a)
 
             if isinstance(init_b, str):
                 if init_b.lower() == "auto":
-                    init_b = "standard"
-
-                # if init_b.lower() == "standard":
-                #     groupwise_scales, init_b_intercept, rmsd_b = closedform_beta_glm_logphi(
-                #         X=input_data.X,
-                #         design_scale=input_data.design_scale[:, [0]],
-                #         constraints=input_data.constraints_scale[[0], [0]].values,
-                #         size_factors=size_factors_init,
-                #         groupwise_means=None,
-                #         link_fn=lambda r: np.log(self.np_clip_param(r, "r"))
-                #     )
-                #     init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
-                #     init_b[0, :] = init_b_intercept
-                #
-                #     logger.debug("Using standard-form MME initialization for dispersion")
-                #     logger.debug("Should train r: %s", self._train_scale)
-                if init_b.lower() == "closed_form":
-                    dmats_unequal = False
-                    if input_data.design_loc.shape[1] == input_data.design_scale.shape[1]:
-                        if np.any(input_data.design_loc.values != input_data.design_scale.values):
-                            dmats_unequal = True
-
-                    inits_unequal = False
-                    if init_a_str is not None:
-                        if init_a_str != init_b:
-                            inits_unequal = True
-
-                    if inits_unequal or dmats_unequal:
-                        raise ValueError("cannot use closed_form init for scale model " +
-                                         "if scale model differs from loc model")
-
-                    groupwise_scales, init_b, rmsd_b = closedform_beta_glm_logphi(
+                    init_b = "closed_form"
+
+                if init_b.lower() == "standard":
+                    groupwise_scales, init_b_intercept, rmsd_b = closedform_beta_glm_logq(
+                        X=input_data.X,
+                        design_loc=input_data.design_loc[:, [0]],
+                        constraints_loc=input_data.constraints_loc[[0], [0]].values,
+                        design_scale=input_data.design_scale[:, [0]],
+                        constraints=input_data.constraints_scale[[0], [0]].values,
+                        size_factors=size_factors_init,
+                        link_fn=lambda q: np.log(self.np_clip_param(q, "q"))
+                    )
+                    init_b = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    init_b[0, :] = init_b_intercept
+                    self._train_scale = True
+
+                    logger.debug("Using standard initialization for q")
+                    logger.debug("Should train q: %s", self._train_loc)
+                elif init_b.lower() == "closed_form":
+                    groupwise_scales, init_b, rmsd_b = closedform_beta_glm_logq(
                         X=input_data.X,
                         design_loc=input_data.design_loc,
                         constraints_loc=input_data.constraints_loc.values,
                         design_scale=input_data.design_scale,
                         constraints=input_data.constraints_scale.values,
                         size_factors=size_factors_init,
-                        link_fn=lambda r: np.log(self.np_clip_param(r, "r"))
+                        link_fn=lambda q: np.log(self.np_clip_param(q, "q"))
                     )
+                    # train q, if the closed-form solution is inaccurate
+                    self._train_scale = not (np.all(rmsd_b == 0) or rmsd_b.size == 0)
 
-                    logger.debug("Using closed-form MME initialization for dispersion")
-                    logger.debug("Should train r: %s", self._train_scale)
+                    logger.debug("Using closed-form MME initialization for q")
+                    logger.debug("Should train q: %s", self._train_scale)
                 elif init_b.lower() == "all_zero":
                     init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
 
-                    logger.debug("Using standard initialization for dispersion")
+                    logger.debug("Using all_zero initialization for q")
                     logger.debug("Should train r: %s", self._train_scale)
                 else:
                     raise ValueError("init_b string %s not recognized" % init_b)
diff --git a/batchglm/train/tf/glm_beta/external.py b/batchglm/train/tf/glm_beta/external.py
index 2e3f2add..66eed360 100644
--- a/batchglm/train/tf/glm_beta/external.py
+++ b/batchglm/train/tf/glm_beta/external.py
@@ -3,7 +3,7 @@
 from batchglm.models.base.input import SparseXArrayDataSet, SparseXArrayDataArray
 from batchglm.models.glm_beta import AbstractEstimator, EstimatorStoreXArray, InputData, Model
 from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
-from batchglm.models.glm_beta.utils import closedform_beta_glm_logmu, closedform_beta_glm_logphi
+from batchglm.models.glm_beta.utils import closedform_beta_glm_logp, closedform_beta_glm_logq
 
 import batchglm.train.tf.ops as op_utils
 import batchglm.train.tf.train as train_utils
diff --git a/batchglm/train/tf/glm_beta/model.py b/batchglm/train/tf/glm_beta/model.py
index 69647650..b89a4023 100644
--- a/batchglm/train/tf/glm_beta/model.py
+++ b/batchglm/train/tf/glm_beta/model.py
@@ -32,8 +32,8 @@ def param_bounds(
             "b_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
             "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
             "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
-            "mu": np.nextafter(0, np.inf, dtype=dtype),
-            "r": np.nextafter(0, np.inf, dtype=dtype),
+            "p": np.nextafter(0, np.inf, dtype=dtype),
+            "q": np.nextafter(0, np.inf, dtype=dtype),
             "probs": dtype(0),
             "log_probs": np.log(np.nextafter(0, np.inf, dtype=dtype)),
         }
@@ -42,8 +42,8 @@ def param_bounds(
             "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            "mu": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "r": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "p": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "q": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
             "probs": dtype(1),
             "log_probs": dtype(0),
         }
@@ -78,9 +78,6 @@ def __init__(
         else:
             eta_loc = tf.matmul(design_loc, a_var)
 
-        if size_factors is not None:
-            eta_loc = tf.add(eta_loc, tf.log(size_factors))
-
         eta_loc = self.tf_clip_param(eta_loc, "eta_loc")
 
         if constraints_scale is not None:
@@ -95,21 +92,22 @@ def __init__(
         model_scale = tf.exp(eta_scale)
 
         # Log-likelihood:
-        log_r_plus_mu = tf.log(model_scale + model_loc)
+        const1 = (model_loc - tf.ones_like(model_loc))
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            Xdense = tf.sparse_to_dense(X)
+            const2 = X.__mul__(const1)
             one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
         else:
-            Xdense = X
+            const2 = X * const1
             one_minus_X = tf.ones_like(X)-X
 
         log_probs = tf.lgamma(model_loc+model_scale) - tf.lgamma(model_loc) - tf.lgamma(model_scale)\
-                    + (model_loc - tf.ones_like(model_loc)) * Xdense + (model_scale - tf.ones_like(model_scale)) * one_minus_X
+                    + const2 + (model_scale - tf.ones_like(model_scale)) * one_minus_X
 
         log_probs = self.tf_clip_param(log_probs, "log_probs")
 
         # Variance:
-        sigma2 = (model_loc * model_scale) / ((model_loc + model_scale)**2 * (model_loc + model_scale + tf.ones_like(model_scale)))
+        sigma2 = (model_loc * model_scale) \
+                 / ((model_loc + model_scale)**2 * (model_loc + model_scale + tf.ones_like(model_scale)))
 
         self.X = X
         self.design_loc = design_loc
@@ -125,8 +123,8 @@ def __init__(
         self.eta_scale = eta_scale
         self.model_loc = model_loc
         self.model_scale = model_scale
-        self.mu = model_loc
-        self.r = model_scale
+        self.p = model_loc
+        self.q = model_scale
 
         self.log_probs = log_probs
 
diff --git a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
index 415373c3..770f0242 100644
--- a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
+++ b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
@@ -47,8 +47,8 @@ def __init__(
 
         batch_size = 500
         provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True,
-                              "nr": True, "nr_tr": True,
-                              "irls": True, "irls_gd": True, "irls_tr": True, "irls_gd_tr": True}
+                              "nr": False, "nr_tr": False,
+                              "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}
 
         if sparse:
             input_data = InputData.new(
@@ -69,6 +69,8 @@ def __init__(
             quick_scale=not train_scale,
             provide_optimizers=provide_optimizers,
             provide_batched=True,
+            provide_fim=False,
+            provide_hessian=False,
             init_a=init_a,
             init_b=init_b
         )
@@ -77,12 +79,9 @@ def estimate(self):
         self.estimator.initialize()
         self.estimator.train_sequence(training_strategy=[
             {
-                "learning_rate": 1,
-                "convergence_criteria": "all_converged_ll",
-                "stopping_criteria": 1e-6,
+                "convergence_criteria": "all_converged",
                 "use_batching": False,
                 "optim_algo": "gd",
-                #"optim_algo": "nr_tr",
             },
         ])
 
@@ -107,11 +106,14 @@ def eval_estimation_a(
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
             elif self.noise_model=="bern":
-                threshold_dev = 1e-2
+                threshold_dev = 1e-1
                 threshold_std = 1e-1
             else:
                 raise ValueError("noise_model not recognized")
 
+        print("estimation: \n", estimator_store.a)
+        print("simulator: \n", self.simulator.a)
+
         if init_a == "standard":
             mean_dev = np.mean(estimator_store.a[0, :] - self.simulator.a[0, :])
             std_dev = np.std(estimator_store.a[0, :] - self.simulator.a[0, :])
@@ -156,6 +158,9 @@ def eval_estimation_b(
             else:
                 raise ValueError("noise_model not recognized")
 
+        print("estimation: \n", estimator_store.b)
+        print("simulator: \n", self.simulator.b)
+
         if init_b == "standard":
             mean_dev = np.mean(estimator_store.b[0, :] - self.simulator.b[0, :])
             std_dev = np.std(estimator_store.b[0, :] - self.simulator.b[0, :])
@@ -256,25 +261,30 @@ def simulate_easy(self):
         self.sim = self.get_simulator()
         self.sim.generate_sample_description(num_batches=1, num_conditions=2)
 
+        def rand_fn_standard(shape):
+            theta = np.ones(shape)
+            theta[0, :] = np.random.uniform(5, 20, shape[1])
+            return theta
+
         if self.noise_model is None:
             raise ValueError("noise_model is None")
         else:
             if self.noise_model=="nb":
                 rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
                 rand_fn_loc = lambda shape: np.ones(shape)
-                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
             elif self.noise_model=="norm":
                 rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
                 rand_fn_loc = lambda shape: np.ones(shape)
-                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
             elif self.noise_model=="beta2":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: 0.5*np.ones(shape)
-                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
             elif self.noise_model=="beta":
                 rand_fn_ave = lambda shape: np.random.uniform(10, 20, shape)
                 rand_fn_loc = lambda shape: np.random.uniform(10, 20, shape)
-                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
             elif self.noise_model=="bern":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: 0.5*np.ones(shape)
@@ -282,15 +292,10 @@ def simulate_easy(self):
             else:
                 raise ValueError("noise_model not recognized")
 
-        def rand_fn_standard(shape):
-            theta = np.ones(shape)
-            theta[0, :] = np.random.uniform(5, 20, shape[1])
-            return theta
-
         self.sim.generate_params(
             rand_fn_ave=rand_fn_ave,
             rand_fn_loc=rand_fn_loc,
-            rand_fn_scale=lambda shape: rand_fn_standard(shape)
+            rand_fn_scale=rand_fn_scale
         )
         self.sim.generate_data()
 
@@ -454,7 +459,7 @@ def test_a_closed_b_closed(self):
         self.noise_model = "bern"
         self.simulate_complex()
         self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
-        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
 
     def test_a_standard_b_standard(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
@@ -464,7 +469,7 @@ def test_a_standard_b_standard(self):
         self.noise_model = "bern"
         self.simulate_easy()
         self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
-        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+        #self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
 
 
 if __name__ == '__main__':
diff --git a/batchglm/unit_test/glm_all/test_acc_glm_all.py b/batchglm/unit_test/glm_all/test_acc_glm_all.py
index 1377b4e8..cdab93e5 100644
--- a/batchglm/unit_test/glm_all/test_acc_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_acc_glm_all.py
@@ -29,6 +29,8 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
+            elif noise_model=="bern":
+                from batchglm.api.models.glm_bern import Estimator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -102,6 +104,8 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -197,3 +201,36 @@ def test_batched_norm(self):
 
 if __name__ == '__main__':
     unittest.main()
+
+
+class Test_Accuracy_GLM_BERN(
+    Test_Accuracy_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for negative binomial noise.
+    """
+
+    def test_full_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_NB.test_full_bern()")
+
+        self.noise_model = "bern"
+        self.simulate()
+        self._test_full(sparse=False)
+        self._test_full(sparse=True)
+
+    def test_batched_bern(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_NB.test_batched_bern()")
+
+        self.noise_model = "bern"
+        self.simulate()
+        self._test_batched(sparse=False)
+        self._test_batched(sparse=True)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_graph_glm_all.py b/batchglm/unit_test/glm_all/test_graph_glm_all.py
index 872302f7..c6d5d9f4 100644
--- a/batchglm/unit_test/glm_all/test_graph_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_graph_glm_all.py
@@ -202,7 +202,7 @@ def test_batched_norm(self):
         self._test_batched(sparse=False)
         self._test_batched(sparse=True)
 
-class Test_Graph_GLM_beta2(
+class Test_Graph_GLM_BETA2(
     Test_Graph_GLM_ALL,
     unittest.TestCase
 ):
@@ -213,7 +213,7 @@ class Test_Graph_GLM_beta2(
     def test_full_beta2(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_beta2.test_full_beta2()")
+        logger.error("Test_Graph_GLM_BETA2.test_full_beta2()")
 
         self.noise_model = "beta2"
         self._test_full(sparse=False)
@@ -222,13 +222,13 @@ def test_full_beta2(self):
     def test_batched_beta2(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_beta2.test_batched_beta2()")
+        logger.error("Test_Graph_GLM_BETA2.test_batched_beta2()")
 
         self.noise_model = "beta2"
         self._test_batched(sparse=False)
         self._test_batched(sparse=True)
 
-class Test_Graph_GLM_beta(
+class Test_Graph_GLM_BETA(
     Test_Graph_GLM_ALL,
     unittest.TestCase
 ):
@@ -239,7 +239,7 @@ class Test_Graph_GLM_beta(
     def test_full_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_beta.test_full_beta()")
+        logger.error("Test_Graph_GLM_BETA.test_full_beta()")
 
         self.noise_model = "beta"
         self._test_full(sparse=False)
@@ -248,7 +248,7 @@ def test_full_beta(self):
     def test_batched_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_beta.test_batched_beta()")
+        logger.error("Test_Graph_GLM_BETA.test_batched_beta()")
 
         self.noise_model = "beta"
         self._test_batched(sparse=False)
diff --git a/batchglm/unit_test/glm_all/test_hessians_glm_all.py b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
index 4fadd79b..4a188deb 100644
--- a/batchglm/unit_test/glm_all/test_hessians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
@@ -120,6 +120,11 @@ def _test_compute_hessians(self, sparse):
         design_loc = data_utils.design_matrix(sample_description, formula="~ 1 + condition + batch")
         design_scale = data_utils.design_matrix(sample_description, formula="~ 1 + condition")
 
+        print("design_loc: \n", design_loc)
+        print("design_scale: \n", design_scale)
+        print("sim.a_var: \n", sim.a_var)
+        print("sim.b_var: \n", sim.b_var)
+
         if sparse:
             input_data = InputData.new(
                 data=scipy.sparse.csr_matrix(sim.X),
@@ -152,10 +157,10 @@ def _test_compute_hessians(self, sparse):
         logging.getLogger("batchglm").info("MAD: %f" % np.max(np.abs((h_tf - h_analytic))))
         logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs(h_tf - h_analytic)))
 
-        #i = 1
-        #print(h_tf[i, :, :])
-        #print(h_analytic[i, :, :])
-        #print((h_tf[i, :, :] - h_analytic[i, :, :]) / h_tf[i, :, :])
+        i = 1
+        print(h_tf[i, :, :])
+        print(h_analytic[i, :, :])
+        print((h_tf[i, :, :] - h_analytic[i, :, :]) / h_tf[i, :, :])
 
         # Make sure that hessians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(h_analytic)) > 1e-10, \
@@ -192,12 +197,12 @@ def test_compute_hessians_norm(self):
         return True
 
 
-class Test_Hessians_GLM_beta2(Test_Hessians_GLM_ALL, unittest.TestCase):
+class Test_Hessians_GLM_BETA2(Test_Hessians_GLM_ALL, unittest.TestCase):
 
     def test_compute_hessians_beta2(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("batchglm").error("Test_Hessians_GLM_beta2.test_compute_hessians_beta2()")
+        logging.getLogger("batchglm").error("Test_Hessians_GLM_BETA2.test_compute_hessians_beta2()")
 
         self.noise_model = "beta2"
         self._test_compute_hessians(sparse=False)
@@ -205,12 +210,12 @@ def test_compute_hessians_beta2(self):
 
         return True
 
-class Test_Hessians_GLM_beta(Test_Hessians_GLM_ALL, unittest.TestCase):
+class Test_Hessians_GLM_BETA(Test_Hessians_GLM_ALL, unittest.TestCase):
 
     def test_compute_hessians_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("batchglm").error("Test_Hessians_GLM_beta.test_compute_hessians_beta()")
+        logging.getLogger("batchglm").error("Test_Hessians_GLM_BETA.test_compute_hessians_beta()")
 
         self.noise_model = "beta"
         self._test_compute_hessians(sparse=False)
diff --git a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
index e016734f..a2129fff 100644
--- a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
@@ -193,12 +193,12 @@ def test_compute_jacobians_norm(self):
         self._test_compute_jacobians(sparse=False)
         #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
 
-class Test_Jacobians_GLM_beta2(Test_Jacobians_GLM_ALL, unittest.TestCase):
+class Test_Jacobians_GLM_BETA2(Test_Jacobians_GLM_ALL, unittest.TestCase):
 
     def test_compute_jacobians_beta2(self):
         logging.getLogger("tensorflow").setLevel(logging.INFO)
         logging.getLogger("batchglm").setLevel(logging.INFO)
-        logging.getLogger("batchglm").error("Test_Jacobians_GLM_beta2.test_compute_jacobians_beta2()")
+        logging.getLogger("batchglm").error("Test_Jacobians_GLM_BETA2.test_compute_jacobians_beta2()")
 
         self.noise_model = "beta2"
         self._test_compute_jacobians(sparse=False)
diff --git a/batchglm/utils/random.py b/batchglm/utils/random.py
index c00ed8ea..7cf7d4b3 100644
--- a/batchglm/utils/random.py
+++ b/batchglm/utils/random.py
@@ -173,7 +173,7 @@ def sample(self, size=None):
         return random_data
 
 
-class beta2:
+class Beta2:
     r"""
     beta2 distribution.
     """
@@ -235,9 +235,9 @@ class Beta:
     a: np.ndarray
     b: np.ndarray
 
-    def __init__(self, a=None, b=None):
-        self.a = a
-        self.b = b
+    def __init__(self, p=None, q=None):
+        self.a = p
+        self.b = q
 
     def sample(self, size=None):
         """

From 6d55680b174d88e12ec710e74d7766679f94bb72 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Mon, 1 Apr 2019 13:53:00 +0200
Subject: [PATCH 08/12] changed fim for beta

---
 batchglm/train/tf/glm_beta/fim.py | 36 ++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/batchglm/train/tf/glm_beta/fim.py b/batchglm/train/tf/glm_beta/fim.py
index c2cf1301..ee2db861 100644
--- a/batchglm/train/tf/glm_beta/fim.py
+++ b/batchglm/train/tf/glm_beta/fim.py
@@ -14,30 +14,40 @@ def _weight_fim_aa(
             loc,
             scale
     ):
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = loc * (tf.digamma(loc + scale) - tf.digamma(loc) + loc * (tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, loc)))
-        const2 = const + loc * loc / (loc + scale)
-
-        return const2
+        return 0
 
     def _weight_fim_bb(
             self,
             loc,
             scale
     ):
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = scale * (tf.digamma(loc + scale) - tf.digamma(scale) + scale * (
-                    tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, scale)))
-        const2 = const + scale * scale / (loc + scale)
 
-        return const2
+        return 0
 
-    def _weight_fim_ab(
+    def _weight_fim(
             self,
             loc,
             scale
     ):
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = tf.polygamma(scalar_one, loc + scale) * loc * scale
 
-        return const
\ No newline at end of file
+        # aa:
+        const1 = loc * (tf.digamma(loc + scale) - tf.digamma(loc) + loc * (
+                    tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, loc)))
+        aa_part = const1 + loc * loc / (loc + scale)
+
+        # bb:
+        const2 = scale * (tf.digamma(loc + scale) - tf.digamma(scale) + scale * (
+                tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, scale)))
+        bb_part = const2 + scale * scale / (loc + scale)
+
+        # ab
+        ab_part = tf.polygamma(scalar_one, loc + scale) * loc * scale
+
+        # should be 4 dimensional object, first two dimensions are dimensions of loc/scale, third and forth should be
+        # the dimensions of the [[aa, ab], [ab, bb]] matrices per element of loc/scale
+        # (aa, ab, bb scalars)
+        # not tested yet!
+        full_fim = tf.stack([tf.stack([aa_part, ab_part], axis=2), tf.stack([ab_part, bb_part], axis=2)], axis=3)
+
+        return full_fim
\ No newline at end of file

From d7a726991ce139e681891cba1dd49e9645aee62e Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Mon, 1 Apr 2019 15:43:39 +0200
Subject: [PATCH 09/12] fixed bounds for probs and logprobs for continous
 distributions and corrected beta ll

---
 batchglm/models/glm_beta/simulator.py   | 11 ++++++++---
 batchglm/models/glm_beta/utils.py       |  1 -
 batchglm/train/tf/glm_beta/estimator.py |  3 ---
 batchglm/train/tf/glm_beta/model.py     | 24 ++++++++++++++++--------
 batchglm/train/tf/glm_beta2/model.py    |  4 ++--
 batchglm/train/tf/glm_norm/model.py     |  4 ++--
 6 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
index 335a0757..098f8bee 100644
--- a/batchglm/models/glm_beta/simulator.py
+++ b/batchglm/models/glm_beta/simulator.py
@@ -25,16 +25,21 @@ def __init__(
     def generate_params(
             self,
             rand_fn_ave=lambda shape: np.random.uniform(10, 20, shape),
-            rand_fn=lambda shape: np.random.uniform(10, 20, shape),
+            rand_fn=lambda shape: np.random.uniform(1, 1, shape),
             rand_fn_loc=None,
-            rand_fn_scale=lambda shape: np.abs(np.random.uniform(40, 80, shape)),
+            rand_fn_scale=None,
         ):
+        def fn_scale(shape):
+            theta = np.ones(shape)
+            theta[0, :] = np.random.uniform(40, 80, shape[1])
+            return theta
+
         self._generate_params(
             self,
             rand_fn_ave=rand_fn_ave,
             rand_fn=rand_fn,
             rand_fn_loc=rand_fn_loc,
-            rand_fn_scale=rand_fn_scale,
+            rand_fn_scale=fn_scale,
         )
 
     def generate_data(self):
diff --git a/batchglm/models/glm_beta/utils.py b/batchglm/models/glm_beta/utils.py
index da660659..3ec81119 100644
--- a/batchglm/models/glm_beta/utils.py
+++ b/batchglm/models/glm_beta/utils.py
@@ -54,7 +54,6 @@ def closedform_beta_glm_logp(
     )
     var = np.exp(v)
     p = mean / var * (mean * (1-mean) - var)
-    print("mean: \n", mean, "\n var: \n", var, "\n p: \n", p)
     return groupwise_means, np.log(p), rmsd1
 
 
diff --git a/batchglm/train/tf/glm_beta/estimator.py b/batchglm/train/tf/glm_beta/estimator.py
index 34b14466..77b5c14c 100644
--- a/batchglm/train/tf/glm_beta/estimator.py
+++ b/batchglm/train/tf/glm_beta/estimator.py
@@ -117,9 +117,6 @@ def __init__(
         init_a = init_a.astype(dtype)
         init_b = init_b.astype(dtype)
 
-        print("init_a: \n", np.exp(init_a))
-        print("init_b: \n", np.exp(init_b))
-
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
                 provide_hessian = True
diff --git a/batchglm/train/tf/glm_beta/model.py b/batchglm/train/tf/glm_beta/model.py
index b89a4023..b1bfd416 100644
--- a/batchglm/train/tf/glm_beta/model.py
+++ b/batchglm/train/tf/glm_beta/model.py
@@ -44,8 +44,8 @@ def param_bounds(
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "p": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
             "q": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "probs": dtype(1),
-            "log_probs": dtype(0),
+            "probs": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "log_probs": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
         }
         return bounds_min, bounds_max
 
@@ -92,19 +92,26 @@ def __init__(
         model_scale = tf.exp(eta_scale)
 
         # Log-likelihood:
-        const1 = (model_loc - tf.ones_like(model_loc))
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const2 = X.__mul__(const1)
+            Xdense = tf.sparse.to_dense(X)
             one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
         else:
-            const2 = X * const1
+            Xdense = X
             one_minus_X = tf.ones_like(X)-X
+        g = tf.lgamma(model_loc+model_scale) - tf.lgamma(model_loc) - tf.lgamma(model_scale)
+        l = (model_loc - tf.ones_like(model_loc)) * tf.log(Xdense) + (model_scale - tf.ones_like(model_scale)) * tf.log(one_minus_X)
+        log_probs = g + l
 
-        log_probs = tf.lgamma(model_loc+model_scale) - tf.lgamma(model_loc) - tf.lgamma(model_scale)\
-                    + const2 + (model_scale - tf.ones_like(model_scale)) * one_minus_X
+        a = tf.print("log_probs: \n", log_probs)
 
         log_probs = self.tf_clip_param(log_probs, "log_probs")
 
+        b = tf.print("log_probs geclipt: \n", log_probs)
+        c = tf.print("model_loc: \n", model_loc)
+        d = tf.print("model_scale: \n", model_scale)
+        # e = tf.print("first part: \n", h)
+        # f = tf.print("second part: \n", i)
+
         # Variance:
         sigma2 = (model_loc * model_scale) \
                  / ((model_loc + model_scale)**2 * (model_loc + model_scale + tf.ones_like(model_scale)))
@@ -126,6 +133,7 @@ def __init__(
         self.p = model_loc
         self.q = model_scale
 
-        self.log_probs = log_probs
+        with tf.control_dependencies([a,b,c,d]):
+            self.log_probs = log_probs
 
         self.sigma2 = sigma2
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta2/model.py b/batchglm/train/tf/glm_beta2/model.py
index a5e3f8cd..170cf403 100644
--- a/batchglm/train/tf/glm_beta2/model.py
+++ b/batchglm/train/tf/glm_beta2/model.py
@@ -51,8 +51,8 @@ def param_bounds(
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "mean": one,
             "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "probs": dtype(1),
-            "log_probs": dtype(0),
+            "probs": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "log_probs": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
         }
         return bounds_min, bounds_max
 
diff --git a/batchglm/train/tf/glm_norm/model.py b/batchglm/train/tf/glm_norm/model.py
index 7b7c2eb5..d0a84da9 100644
--- a/batchglm/train/tf/glm_norm/model.py
+++ b/batchglm/train/tf/glm_norm/model.py
@@ -44,8 +44,8 @@ def param_bounds(
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "mean": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
             "sd": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "probs": dtype(1),
-            "log_probs": dtype(0),
+            "probs": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "log_probs": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
         }
         return bounds_min, bounds_max
 

From dd7ace0472ee896b77935127f0a6aa2565103fc0 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Mon, 1 Apr 2019 16:41:51 +0200
Subject: [PATCH 10/12] last commit with (corrected) p-q-beta distribution

---
 batchglm/train/tf/glm_beta/fim.py             | 34 ++-----------------
 batchglm/train/tf/glm_beta/hessians.py        | 26 +++++++-------
 batchglm/train/tf/glm_beta/jacobians.py       | 19 ++++++-----
 .../glm_all/test_hessians_glm_all.py          | 13 +++----
 4 files changed, 31 insertions(+), 61 deletions(-)

diff --git a/batchglm/train/tf/glm_beta/fim.py b/batchglm/train/tf/glm_beta/fim.py
index ee2db861..69817fbe 100644
--- a/batchglm/train/tf/glm_beta/fim.py
+++ b/batchglm/train/tf/glm_beta/fim.py
@@ -8,46 +8,18 @@
 
 
 class FIM(FIMGLMALL):
+    # No Fisher Information Matrices due to unsolvable E[log(X)]
 
     def _weight_fim_aa(
             self,
             loc,
             scale
     ):
-        return 0
+        assert False, "not implemented"
 
     def _weight_fim_bb(
             self,
             loc,
             scale
     ):
-
-        return 0
-
-    def _weight_fim(
-            self,
-            loc,
-            scale
-    ):
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-
-        # aa:
-        const1 = loc * (tf.digamma(loc + scale) - tf.digamma(loc) + loc * (
-                    tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, loc)))
-        aa_part = const1 + loc * loc / (loc + scale)
-
-        # bb:
-        const2 = scale * (tf.digamma(loc + scale) - tf.digamma(scale) + scale * (
-                tf.polygamma(scalar_one, loc + scale) - tf.polygamma(scalar_one, scale)))
-        bb_part = const2 + scale * scale / (loc + scale)
-
-        # ab
-        ab_part = tf.polygamma(scalar_one, loc + scale) * loc * scale
-
-        # should be 4 dimensional object, first two dimensions are dimensions of loc/scale, third and forth should be
-        # the dimensions of the [[aa, ab], [ab, bb]] matrices per element of loc/scale
-        # (aa, ab, bb scalars)
-        # not tested yet!
-        full_fim = tf.stack([tf.stack([aa_part, ab_part], axis=2), tf.stack([ab_part, bb_part], axis=2)], axis=3)
-
-        return full_fim
\ No newline at end of file
+        assert False, "not implemented"
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta/hessians.py b/batchglm/train/tf/glm_beta/hessians.py
index a3419914..e2b71d03 100644
--- a/batchglm/train/tf/glm_beta/hessians.py
+++ b/batchglm/train/tf/glm_beta/hessians.py
@@ -26,15 +26,16 @@ def _weight_hessian_aa(
             loc,
             scale,
     ):
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = loc * (tf.digamma(loc+scale) - tf.digamma(loc) + loc*(tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, loc)))
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = X.__mul__(loc)
-            const2 = tf.sparse.add(const1, const)
+            Xdense = tf.sparse.to_dense(X)
         else:
-            const2 = const + X * loc
+            Xdense = X
+
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+        const = loc * (tf.digamma(loc+scale) - tf.digamma(loc) + tf.log(Xdense) +
+                       loc*(tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, loc)))
 
-        return const2
+        return const
 
     def _weight_hessian_bb(
             self,
@@ -42,13 +43,14 @@ def _weight_hessian_bb(
             loc,
             scale,
     ):
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = scale * (tf.digamma(loc+scale) - tf.digamma(scale) + scale*(tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, scale)))
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = X.__mul__(scale)
-            const2 = tf.sparse.add(const1, const)
+            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
         else:
-            const2 = const + X * scale
+            one_minus_X = tf.ones_like(X) - X
+
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+        const = scale * (tf.digamma(loc + scale) - tf.digamma(scale) + tf.log(one_minus_X) +
+                         scale * (tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, scale)))
 
-        return const2
+        return const
 
diff --git a/batchglm/train/tf/glm_beta/jacobians.py b/batchglm/train/tf/glm_beta/jacobians.py
index 51d0d898..eae30d72 100644
--- a/batchglm/train/tf/glm_beta/jacobians.py
+++ b/batchglm/train/tf/glm_beta/jacobians.py
@@ -16,12 +16,12 @@ def _weights_jac_a(
             scale,
     ):
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const = tf.sparse.add(X, tf.digamma(loc+scale) - tf.digamma(loc))
+            Xdense = tf.sparse.to_dense(X)
         else:
-            const = tf.digamma(loc+scale) - tf.digamma(loc) + X
-        const1 = const * loc
+            Xdense = X
+        const = (tf.digamma(loc+scale) - tf.digamma(loc) + tf.log(Xdense)) * loc
 
-        return const1
+        return const
 
     def _weights_jac_b(
             self,
@@ -29,11 +29,12 @@ def _weights_jac_b(
             loc,
             scale,
     ):
-        # Pre-define sub-graphs that are used multiple times:
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const = - tf.sparse_add(X, - tf.digamma(loc+scale) + tf.digamma(scale) -tf.ones(shape=X.dense_shape, dtype=self.dtype))
+            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
         else:
-            const = tf.digamma(loc+scale) - tf.digamma(scale) + tf.ones_like(X) - X
-        const1 = const * scale
+            one_minus_X = tf.ones_like(X) - X
 
-        return const1
+
+        const = (tf.digamma(loc+scale) - tf.digamma(scale) + tf.log(one_minus_X)) * scale
+
+        return const
diff --git a/batchglm/unit_test/glm_all/test_hessians_glm_all.py b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
index 4a188deb..12428c93 100644
--- a/batchglm/unit_test/glm_all/test_hessians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
@@ -118,12 +118,7 @@ def _test_compute_hessians(self, sparse):
 
         sample_description = data_utils.sample_description_from_xarray(sim.data, dim="observations")
         design_loc = data_utils.design_matrix(sample_description, formula="~ 1 + condition + batch")
-        design_scale = data_utils.design_matrix(sample_description, formula="~ 1 + condition")
-
-        print("design_loc: \n", design_loc)
-        print("design_scale: \n", design_scale)
-        print("sim.a_var: \n", sim.a_var)
-        print("sim.b_var: \n", sim.b_var)
+        design_scale = data_utils.design_matrix(sample_description, formula="~ 1 + condition + batch")
 
         if sparse:
             input_data = InputData.new(
@@ -158,9 +153,9 @@ def _test_compute_hessians(self, sparse):
         logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs(h_tf - h_analytic)))
 
         i = 1
-        print(h_tf[i, :, :])
-        print(h_analytic[i, :, :])
-        print((h_tf[i, :, :] - h_analytic[i, :, :]) / h_tf[i, :, :])
+        print("\n h_tf: \n", h_tf[i, :, :])
+        print("\n h_analytic: \n", h_analytic[i, :, :])
+        print("\n difference: \n", (h_tf[i, :, :] - h_analytic[i, :, :]))
 
         # Make sure that hessians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(h_analytic)) > 1e-10, \

From af5108c27ad43f206c5124eedae1601867763938 Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Mon, 1 Apr 2019 16:52:46 +0200
Subject: [PATCH 11/12] removed new beta version as bug in old version is fixed

---
 batchglm/api/models/__init__.py               |   1 -
 batchglm/api/models/glm_beta2.py              |   2 -
 batchglm/api/utils/random.py                  |   2 +-
 batchglm/models/glm_beta/estimator.py         |   2 +-
 batchglm/models/glm_beta/model.py             |  17 +-
 batchglm/models/glm_beta/simulator.py         |  21 +-
 batchglm/models/glm_beta/utils.py             |  80 ++---
 batchglm/models/glm_beta2/__init__.py         |   4 -
 batchglm/models/glm_beta2/estimator.py        |  30 --
 batchglm/models/glm_beta2/external.py         |  11 -
 batchglm/models/glm_beta2/model.py            |  83 -----
 batchglm/models/glm_beta2/simulator.py        |  47 ---
 batchglm/models/glm_beta2/utils.py            |  74 -----
 batchglm/train/tf/base_glm_all/estimator.py   |   4 -
 .../train/tf/base_glm_all/estimator_graph.py  |   6 -
 .../train/tf/base_glm_all/external_beta.py    |   2 +-
 .../train/tf/base_glm_all/external_beta2.py   |   6 -
 .../tf/base_glm_all/reducible_tensors.py      |   2 -
 batchglm/train/tf/glm_beta/estimator.py       | 121 ++++---
 batchglm/train/tf/glm_beta/external.py        |   2 +-
 batchglm/train/tf/glm_beta/hessians.py        |  52 ++-
 batchglm/train/tf/glm_beta/jacobians.py       |  18 +-
 batchglm/train/tf/glm_beta/model.py           |  68 ++--
 .../train/tf/glm_beta/reducible_tensors.py    |   2 +-
 .../train/tf/glm_beta/training_strategies.py  |  30 +-
 batchglm/train/tf/glm_beta2/__init__.py       |   7 -
 batchglm/train/tf/glm_beta2/estimator.py      | 307 ------------------
 .../train/tf/glm_beta2/estimator_graph.py     |  12 -
 batchglm/train/tf/glm_beta2/external.py       |  20 --
 batchglm/train/tf/glm_beta2/fim.py            |  25 --
 batchglm/train/tf/glm_beta2/hessians.py       |  82 -----
 batchglm/train/tf/glm_beta2/jacobians.py      |  42 ---
 batchglm/train/tf/glm_beta2/model.py          | 145 ---------
 .../train/tf/glm_beta2/reducible_tensors.py   |  13 -
 .../train/tf/glm_beta2/training_strategies.py |  37 ---
 .../glm_all/test_acc_analytic_glm_all_2.py    |  58 +---
 .../unit_test/glm_all/test_graph_glm_all.py   |  29 --
 .../glm_all/test_hessians_glm_all.py          |  19 --
 .../glm_all/test_jacobians_glm_all.py         |  16 -
 batchglm/utils/random.py                      |  37 +--
 40 files changed, 232 insertions(+), 1304 deletions(-)
 delete mode 100644 batchglm/api/models/glm_beta2.py
 delete mode 100644 batchglm/models/glm_beta2/__init__.py
 delete mode 100644 batchglm/models/glm_beta2/estimator.py
 delete mode 100644 batchglm/models/glm_beta2/external.py
 delete mode 100644 batchglm/models/glm_beta2/model.py
 delete mode 100644 batchglm/models/glm_beta2/simulator.py
 delete mode 100644 batchglm/models/glm_beta2/utils.py
 delete mode 100644 batchglm/train/tf/base_glm_all/external_beta2.py
 delete mode 100644 batchglm/train/tf/glm_beta2/__init__.py
 delete mode 100644 batchglm/train/tf/glm_beta2/estimator.py
 delete mode 100644 batchglm/train/tf/glm_beta2/estimator_graph.py
 delete mode 100644 batchglm/train/tf/glm_beta2/external.py
 delete mode 100644 batchglm/train/tf/glm_beta2/fim.py
 delete mode 100644 batchglm/train/tf/glm_beta2/hessians.py
 delete mode 100644 batchglm/train/tf/glm_beta2/jacobians.py
 delete mode 100644 batchglm/train/tf/glm_beta2/model.py
 delete mode 100644 batchglm/train/tf/glm_beta2/reducible_tensors.py
 delete mode 100644 batchglm/train/tf/glm_beta2/training_strategies.py

diff --git a/batchglm/api/models/__init__.py b/batchglm/api/models/__init__.py
index e11f0a84..5a3142c9 100644
--- a/batchglm/api/models/__init__.py
+++ b/batchglm/api/models/__init__.py
@@ -1,5 +1,4 @@
 from . import glm_nb
 from . import glm_norm
-from . import glm_beta2
 from . import glm_beta
 from . import glm_bern
diff --git a/batchglm/api/models/glm_beta2.py b/batchglm/api/models/glm_beta2.py
deleted file mode 100644
index 88d69651..00000000
--- a/batchglm/api/models/glm_beta2.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from batchglm.models.glm_beta2 import InputData, Model, Simulator
-from batchglm.train.tf.glm_beta2 import Estimator
\ No newline at end of file
diff --git a/batchglm/api/utils/random.py b/batchglm/api/utils/random.py
index 64bdcd88..60cdae3e 100644
--- a/batchglm/api/utils/random.py
+++ b/batchglm/api/utils/random.py
@@ -1 +1 @@
-from batchglm.utils.random import NegativeBinomial, Normal, Beta2, Bernoulli, Beta
+from batchglm.utils.random import NegativeBinomial, Normal, Bernoulli, Beta
diff --git a/batchglm/models/glm_beta/estimator.py b/batchglm/models/glm_beta/estimator.py
index 2e7457f1..034dfffe 100644
--- a/batchglm/models/glm_beta/estimator.py
+++ b/batchglm/models/glm_beta/estimator.py
@@ -7,7 +7,7 @@
 class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
     r"""
     Estimator base class for generalized linear models (GLMs) with
-    beta distributed noise.
+    normal noise.
     """
 
     @classmethod
diff --git a/batchglm/models/glm_beta/model.py b/batchglm/models/glm_beta/model.py
index d7b7df3f..4ac37fb5 100644
--- a/batchglm/models/glm_beta/model.py
+++ b/batchglm/models/glm_beta/model.py
@@ -12,13 +12,13 @@
 # Define distribution parameters:
 MODEL_PARAMS = MODEL_PARAMS.copy()
 MODEL_PARAMS.update({
-    "p": ("observations", "features"),
-    "q": ("observations", "features"),
+    "mean": ("observations", "features"),
+    "samplesize": ("observations", "features"),
 })
 
 class Model(_Model_GLM, metaclass=abc.ABCMeta):
     """
-    Generalized Linear Model (GLM) with negative binomial noise.
+    Generalized Linear Model (GLM) with beta distributed noise, logit link for location and log link for scale.
     """
 
     @classmethod
@@ -26,10 +26,10 @@ def param_shapes(cls) -> dict:
         return MODEL_PARAMS
 
     def link_loc(self, data):
-        return np.log(data)
+        return np.log(1/(1/data-1))
 
     def inverse_link_loc(self, data):
-        return np.exp(data)
+        return 1/(1+np.exp(-data))
 
     def link_scale(self, data):
         return np.log(data)
@@ -44,14 +44,17 @@ def eta_loc(self) -> xr.DataArray:
             eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
         else:
             eta = np.matmul(self.design_loc.values, self.par_link_loc)
+
+        if self.size_factors is not None:
+            assert False, "size factors not allowed"
         return eta
 
     @property
-    def p(self) -> xr.DataArray:
+    def mean(self) -> xr.DataArray:
         return self.location
 
     @property
-    def q(self) -> xr.DataArray:
+    def samplesize(self) -> xr.DataArray:
         return self.scale
 
 
diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
index 098f8bee..7d9143d7 100644
--- a/batchglm/models/glm_beta/simulator.py
+++ b/batchglm/models/glm_beta/simulator.py
@@ -7,7 +7,7 @@
 class Simulator(_Simulator_GLM, Model):
     """
     Simulator for Generalized Linear Models (GLMs) with beta distributed noise.
-    Uses the natural logarithm as linker function.
+    Uses a logit-linker function for loc and a log-linker function for scale.
     """
 
     def __init__(
@@ -24,29 +24,24 @@ def __init__(
 
     def generate_params(
             self,
-            rand_fn_ave=lambda shape: np.random.uniform(10, 20, shape),
-            rand_fn=lambda shape: np.random.uniform(1, 1, shape),
-            rand_fn_loc=None,
-            rand_fn_scale=None,
+            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
+            rand_fn=None,
+            rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
+            rand_fn_scale=lambda shape: np.random.uniform(1e1, 2*1e1, shape),
         ):
-        def fn_scale(shape):
-            theta = np.ones(shape)
-            theta[0, :] = np.random.uniform(40, 80, shape[1])
-            return theta
-
         self._generate_params(
             self,
             rand_fn_ave=rand_fn_ave,
             rand_fn=rand_fn,
             rand_fn_loc=rand_fn_loc,
-            rand_fn_scale=fn_scale,
+            rand_fn_scale=rand_fn_scale,
         )
 
     def generate_data(self):
         """
-        Sample random data based on negative binomial distribution and parameters.
+        Sample random data based on beta distribution and parameters.
         """
         self.data["X"] = (
             self.param_shapes()["X"],
-            rand_utils.Beta(p=self.p, q=self.q).sample()
+            rand_utils.Beta(mean=self.mean, samplesize=self.samplesize).sample()
         )
diff --git a/batchglm/models/glm_beta/utils.py b/batchglm/models/glm_beta/utils.py
index 3ec81119..a3c8592d 100644
--- a/batchglm/models/glm_beta/utils.py
+++ b/batchglm/models/glm_beta/utils.py
@@ -1,39 +1,36 @@
+from copy import copy, deepcopy
 from typing import Union
 
 import numpy as np
+import scipy.sparse
 import xarray as xr
 
 from .external import closedform_glm_mean, closedform_glm_scale
+from .external import weighted_mean
 from .external import SparseXArrayDataArray
 
 
-def closedform_beta_glm_logp(
+def closedform_beta_glm_logitmean(
         X: Union[xr.DataArray, SparseXArrayDataArray],
         design_loc,
         constraints_loc,
-        design_scale: xr.DataArray,
-        constraints=None,
         size_factors=None,
-        link_fn=np.log,
-        inv_link_fn=np.exp
+        link_fn=lambda x: np.log(1/(1/x-1)),
+        inv_link_fn=lambda x: 1/(1+np.exp(-x))
 ):
     r"""
-    Calculates a closed-form solution for the `p` parameters of beta GLMs.
+    Calculates a closed-form solution for the `mean` parameters of beta GLMs.
 
     :param X: The sample data
     :param design_loc: design matrix for location
-    :param constraints_loc: tensor (all parameters x dependent parameters)
+    :param constraints: tensor (all parameters x dependent parameters)
         Tensor that encodes how complete parameter set which includes dependent
         parameters arises from indepedent parameters: all = <constraints, indep>.
         This form of constraints is used in vector generalized linear models (VGLMs).
-    :param design_scale: design matrix for scale
-    :param constraints: some design constraints
     :param size_factors: size factors for X
-    :param link_fn: linker function for GLM
-    :param inv_link_fn: inverse linker function for GLM
-    :return: tuple: (groupwise_means, mu, rmsd)
+    :return: tuple: (groupwise_means, mean, rmsd)
     """
-    groupwise_means, m, rmsd1 =  closedform_glm_mean(
+    return closedform_glm_mean(
         X=X,
         dmat=design_loc,
         constraints=constraints_loc,
@@ -41,68 +38,37 @@ def closedform_beta_glm_logp(
         link_fn=link_fn,
         inv_link_fn=inv_link_fn
     )
-    mean = np.exp(m)
-
-    groupwise_scale, v, rmsd2 =  closedform_glm_scale(
-        X=X,
-        design_scale=design_scale,
-        constraints=constraints,
-        size_factors=size_factors,
-        groupwise_means=None,
-        link_fn=link_fn,
-        compute_scales_fun=None
-    )
-    var = np.exp(v)
-    p = mean / var * (mean * (1-mean) - var)
-    return groupwise_means, np.log(p), rmsd1
 
 
-def closedform_beta_glm_logq(
+def closedform_beta_glm_logsamplesize(
         X: Union[xr.DataArray, SparseXArrayDataArray],
-        design_loc,
-        constraints_loc,
         design_scale: xr.DataArray,
         constraints=None,
         size_factors=None,
-        link_fn=np.log,
-        inv_link_fn=np.exp,
+        groupwise_means=None,
+        link_fn=np.log
 ):
     r"""
-    Calculates a closed-form solution for the `q` parameters of beta GLMs.
+    Calculates a closed-form solution for the log-scale parameters of beta GLMs.
 
     :param X: The sample data
-    :param design_loc: design matrix for location
-    :param constraints_loc: tensor (all parameters x dependent parameters)
-        Tensor that encodes how complete parameter set which includes dependent
-        parameters arises from indepedent parameters: all = <constraints, indep>.
-        This form of constraints is used in vector generalized linear models (VGLMs).
     :param design_scale: design matrix for scale
     :param constraints: some design constraints
     :param size_factors: size factors for X
-    :param link_fn: linker function for GLM
-    :param inv_link_fn: inverse linker function for GLM
-    :return: tuple: (groupwise_means, mu, rmsd)
+    :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
+    :return: tuple (groupwise_scales, logsd, rmsd)
     """
-    groupwise_means, m, rmsd1 = closedform_glm_mean(
-        X=X,
-        dmat=design_loc,
-        constraints=constraints_loc,
-        size_factors=size_factors,
-        link_fn=link_fn,
-        inv_link_fn=inv_link_fn
-    )
-    mean = np.exp(m)
 
-    groupwise_scale, v, rmsd2 = closedform_glm_scale(
+    def compute_scales_fun(variance, mean):
+        groupwise_scales = mean*(1-mean)/variance - 1
+        return groupwise_scales
+
+    return closedform_glm_scale(
         X=X,
         design_scale=design_scale,
         constraints=constraints,
         size_factors=size_factors,
-        groupwise_means=None,
+        groupwise_means=groupwise_means,
         link_fn=link_fn,
-        compute_scales_fun=None
+        compute_scales_fun=compute_scales_fun
     )
-    var = np.exp(v)
-
-    q = (1 - mean) / var * (mean * (1 - mean) - var)
-    return groupwise_scale, np.log(q), rmsd2
diff --git a/batchglm/models/glm_beta2/__init__.py b/batchglm/models/glm_beta2/__init__.py
deleted file mode 100644
index efcf833d..00000000
--- a/batchglm/models/glm_beta2/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .model import Model, Model_XArray
-from .external import InputData
-from .simulator import Simulator
-from .estimator import AbstractEstimator, EstimatorStoreXArray
\ No newline at end of file
diff --git a/batchglm/models/glm_beta2/estimator.py b/batchglm/models/glm_beta2/estimator.py
deleted file mode 100644
index 034dfffe..00000000
--- a/batchglm/models/glm_beta2/estimator.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import abc
-
-from .model import Model, Model_XArray
-from .external import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
-
-
-class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
-    r"""
-    Estimator base class for generalized linear models (GLMs) with
-    normal noise.
-    """
-
-    @classmethod
-    def param_shapes(cls) -> dict:
-        return ESTIMATOR_PARAMS
-
-
-class EstimatorStoreXArray(_EstimatorStore_XArray_GLM, AbstractEstimator, Model_XArray):
-
-    def __init__(self, estim: AbstractEstimator):
-        input_data = estim.input_data
-        # to_xarray triggers the get function of these properties and thereby
-        # causes evaluation of the properties that have not been computed during
-        # training, such as the hessian.
-        params = estim.to_xarray(
-            ["a_var", "b_var", "loss", "log_likelihood", "gradients", "fisher_inv"],
-            coords=input_data.data
-        )
-
-        Model_XArray.__init__(self, input_data, params)
\ No newline at end of file
diff --git a/batchglm/models/glm_beta2/external.py b/batchglm/models/glm_beta2/external.py
deleted file mode 100644
index bb52b9f2..00000000
--- a/batchglm/models/glm_beta2/external.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from batchglm.models.base import SparseXArrayDataArray, SparseXArrayDataSet
-from batchglm.models.base_glm import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
-from batchglm.models.base_glm import InputData, INPUT_DATA_PARAMS
-from batchglm.models.base_glm import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
-from batchglm.models.base_glm import _Simulator_GLM
-from batchglm.models.base_glm import closedform_glm_mean, closedform_glm_scale
-
-import batchglm.data as data_utils
-import batchglm.utils.random as rand_utils
-from batchglm.utils.numeric import weighted_mean, weighted_variance
-from batchglm.utils.linalg import groupwise_solve_lm
\ No newline at end of file
diff --git a/batchglm/models/glm_beta2/model.py b/batchglm/models/glm_beta2/model.py
deleted file mode 100644
index 7c7d208f..00000000
--- a/batchglm/models/glm_beta2/model.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import abc
-try:
-    import anndata
-except ImportError:
-    anndata = None
-import xarray as xr
-import numpy as np
-
-from .external import InputData
-from .external import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
-
-# Define distribution parameters:
-MODEL_PARAMS = MODEL_PARAMS.copy()
-MODEL_PARAMS.update({
-    "mean": ("observations", "features"),
-    "samplesize": ("observations", "features"),
-})
-
-class Model(_Model_GLM, metaclass=abc.ABCMeta):
-    """
-    Generalized Linear Model (GLM) with beta2 distributed noise, logit link for location and log link for scale.
-    """
-
-    @classmethod
-    def param_shapes(cls) -> dict:
-        return MODEL_PARAMS
-
-    def link_loc(self, data):
-        return np.log(1/(1/data-1))
-
-    def inverse_link_loc(self, data):
-        return 1/(1+np.exp(-data))
-
-    def link_scale(self, data):
-        return np.log(data)
-
-    def inverse_link_scale(self, data):
-        return np.exp(data)
-
-    @property
-    def eta_loc(self) -> xr.DataArray:
-        # TODO: take this switch out once xr.dataset slicing yields dataarray with loc_names coordinate:
-        if isinstance(self.par_link_loc, xr.DataArray):
-            eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
-        else:
-            eta = np.matmul(self.design_loc.values, self.par_link_loc)
-
-        if self.size_factors is not None:
-            assert False, "size factors not allowed"
-        return eta
-
-    @property
-    def mean(self) -> xr.DataArray:
-        return self.location
-
-    @property
-    def samplesize(self) -> xr.DataArray:
-        return self.scale
-
-
-def model_from_params(*args, **kwargs) -> Model:
-    (input_data, params) = _model_from_params(*args, **kwargs)
-    return Model_XArray(input_data, params)
-
-
-class Model_XArray(_Model_XArray_GLM, Model):
-    _input_data: InputData
-    params: xr.Dataset
-
-    def __init__(self, input_data: InputData, params: xr.Dataset):
-        super(_Model_XArray_GLM, self).__init__(input_data=input_data, params=params)
-        super(Model, self).__init__()
-
-    def __str__(self):
-        return "[%s.%s object at %s]: data=%s" % (
-            type(self).__module__,
-            type(self).__name__,
-            hex(id(self)),
-            self.params
-        )
-
-    def __repr__(self):
-        return self.__str__()
diff --git a/batchglm/models/glm_beta2/simulator.py b/batchglm/models/glm_beta2/simulator.py
deleted file mode 100644
index 545543bc..00000000
--- a/batchglm/models/glm_beta2/simulator.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import numpy as np
-
-from .model import Model
-from .external import rand_utils, _Simulator_GLM
-
-
-class Simulator(_Simulator_GLM, Model):
-    """
-    Simulator for Generalized Linear Models (GLMs) with beta2 distributed noise.
-    Uses a logit-linker function for loc and a log-linker function for scale.
-    """
-
-    def __init__(
-            self,
-            num_observations=1000,
-            num_features=100
-    ):
-        Model.__init__(self)
-        _Simulator_GLM.__init__(
-            self,
-            num_observations=num_observations,
-            num_features=num_features
-        )
-
-    def generate_params(
-            self,
-            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
-            rand_fn=None,
-            rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1e1, 2*1e1, shape),
-        ):
-        self._generate_params(
-            self,
-            rand_fn_ave=rand_fn_ave,
-            rand_fn=rand_fn,
-            rand_fn_loc=rand_fn_loc,
-            rand_fn_scale=rand_fn_scale,
-        )
-
-    def generate_data(self):
-        """
-        Sample random data based on beta2 distribution and parameters.
-        """
-        self.data["X"] = (
-            self.param_shapes()["X"],
-            rand_utils.Beta2(mean=self.mean, samplesize=self.samplesize).sample()
-        )
diff --git a/batchglm/models/glm_beta2/utils.py b/batchglm/models/glm_beta2/utils.py
deleted file mode 100644
index 025621d9..00000000
--- a/batchglm/models/glm_beta2/utils.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from copy import copy, deepcopy
-from typing import Union
-
-import numpy as np
-import scipy.sparse
-import xarray as xr
-
-from .external import closedform_glm_mean, closedform_glm_scale
-from .external import weighted_mean
-from .external import SparseXArrayDataArray
-
-
-def closedform_beta2_glm_logitmean(
-        X: Union[xr.DataArray, SparseXArrayDataArray],
-        design_loc,
-        constraints_loc,
-        size_factors=None,
-        link_fn=lambda x: np.log(1/(1/x-1)),
-        inv_link_fn=lambda x: 1/(1+np.exp(-x))
-):
-    r"""
-    Calculates a closed-form solution for the `mean` parameters of beta2 GLMs.
-
-    :param X: The sample data
-    :param design_loc: design matrix for location
-    :param constraints: tensor (all parameters x dependent parameters)
-        Tensor that encodes how complete parameter set which includes dependent
-        parameters arises from indepedent parameters: all = <constraints, indep>.
-        This form of constraints is used in vector generalized linear models (VGLMs).
-    :param size_factors: size factors for X
-    :return: tuple: (groupwise_means, mean, rmsd)
-    """
-    return closedform_glm_mean(
-        X=X,
-        dmat=design_loc,
-        constraints=constraints_loc,
-        size_factors=size_factors,
-        link_fn=link_fn,
-        inv_link_fn=inv_link_fn
-    )
-
-
-def closedform_beta2_glm_logsamplesize(
-        X: Union[xr.DataArray, SparseXArrayDataArray],
-        design_scale: xr.DataArray,
-        constraints=None,
-        size_factors=None,
-        groupwise_means=None,
-        link_fn=np.log
-):
-    r"""
-    Calculates a closed-form solution for the log-scale parameters of beta2 GLMs.
-
-    :param X: The sample data
-    :param design_scale: design matrix for scale
-    :param constraints: some design constraints
-    :param size_factors: size factors for X
-    :param groupwise_means: optional, in case if already computed this can be specified to spare double-calculation
-    :return: tuple (groupwise_scales, logsd, rmsd)
-    """
-
-    def compute_scales_fun(variance, mean):
-        groupwise_scales = mean*(1-mean)/variance - 1
-        return groupwise_scales
-
-    return closedform_glm_scale(
-        X=X,
-        design_scale=design_scale,
-        constraints=constraints,
-        size_factors=size_factors,
-        groupwise_means=groupwise_means,
-        link_fn=link_fn,
-        compute_scales_fun=compute_scales_fun
-    )
diff --git a/batchglm/train/tf/base_glm_all/estimator.py b/batchglm/train/tf/base_glm_all/estimator.py
index 51ca565e..3b21219f 100644
--- a/batchglm/train/tf/base_glm_all/estimator.py
+++ b/batchglm/train/tf/base_glm_all/estimator.py
@@ -71,8 +71,6 @@ def __init__(
             from .external_nb import EstimatorGraph
         elif noise_model == "norm":
             from .external_norm import EstimatorGraph
-        elif noise_model == "beta2":
-            from .external_beta2 import EstimatorGraph
         elif noise_model == "beta":
             from .external_beta import EstimatorGraph
         elif noise_model == "bern":
@@ -358,8 +356,6 @@ def finalize(self):
             from .external_nb import EstimatorStoreXArray
         elif self.noise_model == "norm":
             from .external_norm import EstimatorStoreXArray
-        elif self.noise_model == "beta2":
-            from .external_beta2 import EstimatorStoreXArray
         elif self.noise_model == "beta":
             from .external_beta import EstimatorStoreXArray
         elif self.noise_model == "bern":
diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
index 52f36f7c..9b360cd7 100644
--- a/batchglm/train/tf/base_glm_all/estimator_graph.py
+++ b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -64,8 +64,6 @@ def __init__(
             from .external_nb import ReducibleTensors
         elif noise_model == "norm":
             from .external_norm import ReducibleTensors
-        elif noise_model == "beta2":
-            from .external_beta2 import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
         elif noise_model == "bern":
@@ -254,8 +252,6 @@ def __init__(
             from .external_nb import ReducibleTensors
         elif noise_model == "norm":
             from .external_norm import ReducibleTensors
-        elif noise_model == "beta2":
-            from .external_beta2 import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
         elif noise_model == "bern":
@@ -439,8 +435,6 @@ def __init__(
             from .external_nb import ModelVars
         elif noise_model == "norm":
             from .external_norm import ModelVars
-        elif noise_model == "beta2":
-            from .external_beta2 import ModelVars
         elif noise_model == "beta":
             from .external_beta import ModelVars
         elif noise_model == "bern":
diff --git a/batchglm/train/tf/base_glm_all/external_beta.py b/batchglm/train/tf/base_glm_all/external_beta.py
index 5cc41515..466cd3d0 100644
--- a/batchglm/train/tf/base_glm_all/external_beta.py
+++ b/batchglm/train/tf/base_glm_all/external_beta.py
@@ -3,4 +3,4 @@
 from batchglm.train.tf.glm_beta import Hessians, FIM, Jacobians, ReducibleTensors
 
 from batchglm.models.glm_beta import AbstractEstimator, EstimatorStoreXArray, InputData, Model
-from batchglm.models.glm_beta.utils import closedform_beta_glm_logp, closedform_beta_glm_logq
\ No newline at end of file
+from batchglm.models.glm_beta.utils import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize
\ No newline at end of file
diff --git a/batchglm/train/tf/base_glm_all/external_beta2.py b/batchglm/train/tf/base_glm_all/external_beta2.py
deleted file mode 100644
index efdcbb2f..00000000
--- a/batchglm/train/tf/base_glm_all/external_beta2.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from batchglm.train.tf.glm_beta2 import EstimatorGraph
-from batchglm.train.tf.glm_beta2 import BasicModelGraph, ModelVars, ProcessModel
-from batchglm.train.tf.glm_beta2 import Hessians, FIM, Jacobians, ReducibleTensors
-
-from batchglm.models.glm_beta2 import AbstractEstimator, EstimatorStoreXArray, InputData, Model
-from batchglm.models.glm_beta2.utils import closedform_beta2_glm_logitmean, closedform_beta2_glm_logsamplesize
\ No newline at end of file
diff --git a/batchglm/train/tf/base_glm_all/reducible_tensors.py b/batchglm/train/tf/base_glm_all/reducible_tensors.py
index 558c6870..dbe20689 100644
--- a/batchglm/train/tf/base_glm_all/reducible_tensors.py
+++ b/batchglm/train/tf/base_glm_all/reducible_tensors.py
@@ -35,8 +35,6 @@ def assemble_tensors(self, idx, data):
             from .external_nb import BasicModelGraph
         elif self.noise_model == "norm":
             from .external_norm import BasicModelGraph
-        elif self.noise_model == "beta2":
-            from .external_beta2 import BasicModelGraph
         elif self.noise_model == "beta":
             from .external_beta import BasicModelGraph
         elif self.noise_model == "bern":
diff --git a/batchglm/train/tf/glm_beta/estimator.py b/batchglm/train/tf/glm_beta/estimator.py
index 77b5c14c..a927dd50 100644
--- a/batchglm/train/tf/glm_beta/estimator.py
+++ b/batchglm/train/tf/glm_beta/estimator.py
@@ -5,7 +5,7 @@
 import tensorflow as tf
 
 from .external import AbstractEstimator, EstimatorAll, ESTIMATOR_PARAMS, InputData, Model
-from .external import closedform_beta_glm_logp, closedform_beta_glm_logq
+from .external import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize
 from .external import SparseXArrayDataArray
 from .estimator_graph import EstimatorGraph
 from .model import ProcessModel
@@ -16,8 +16,8 @@
 
 class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
     """
-    Estimator for Generalized Linear Models (GLMs) with beta noise.
-    Uses the natural logarithm as linker function.
+    Estimator for Generalized Linear Models (GLMs) with beta distributed noise.
+    Uses a logit linker function for loc and log linker function for scale.
     """
 
     def __init__(
@@ -26,8 +26,8 @@ def __init__(
             batch_size: int = 500,
             graph: tf.Graph = None,
             init_model: Model = None,
-            init_a: Union[np.ndarray, str] = "closed_form",
-            init_b: Union[np.ndarray, str] = "closed_form",
+            init_a: Union[np.ndarray, str] = "AUTO",
+            init_b: Union[np.ndarray, str] = "AUTO",
             quick_scale: bool = False,
             model: EstimatorGraph = None,
             provide_optimizers: dict = {
@@ -37,10 +37,10 @@ def __init__(
                 "rmsprop": True,
                 "nr": True,
                 "nr_tr": True,
-                "irls": True,
-                "irls_gd": True,
-                "irls_tr": True,
-                "irls_gd_tr": True,
+                "irls": False,
+                "irls_gd": False,
+                "irls_tr": False,
+                "irls_gd_tr": False,
             },
             provide_batched: bool = False,
             provide_fim: bool = False,
@@ -116,12 +116,19 @@ def __init__(
         )
         init_a = init_a.astype(dtype)
         init_b = init_b.astype(dtype)
+        if quick_scale:
+            self._train_scale = False
+
+        print("init_a")
+        print(init_a)
+        print("init_b")
+        print(init_b)
 
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
                 provide_hessian = True
-            if np.any([x.lower() in ["irls", "irls_tr", "irls_gd", "irls_gd_tr"] for x in optim_algos]):
-                assert False, "Irls not possible for beta GLM"
+            if np.any([x.lower() in ["irls", "irls_tr"] for x in optim_algos]):
+                provide_fim = True
 
         EstimatorAll.__init__(
             self=self,
@@ -171,93 +178,99 @@ def init_par(
         size_factors_init = input_data.size_factors
 
         if init_model is None:
+            groupwise_means = None
+            init_a_str = None
             if isinstance(init_a, str):
+                init_a_str = init_a.lower()
                 # Chose option if auto was chosen
                 if init_a.lower() == "auto":
                     init_a = "closed_form"
+
                 if init_a.lower() == "closed_form":
-                    groupwise_means, init_a, rmsd_a = closedform_beta_glm_logp(
+                    groupwise_means, init_a, rmsd_a = closedform_beta_glm_logitmean(
                         X=input_data.X,
                         design_loc=input_data.design_loc,
                         constraints_loc=input_data.constraints_loc.values,
-                        design_scale=input_data.design_scale,
-                        constraints=input_data.constraints_scale.values,
                         size_factors=size_factors_init,
-                        link_fn=lambda p: np.log(self.np_clip_param(p, "p"))
+                        link_fn=lambda mean: np.log(
+                            1/(1/self.np_clip_param(mean, "mean")-1)
+                        )
                     )
 
-                    # train p, if the closed-form solution is inaccurate
+                    # train mu, if the closed-form solution is inaccurate
                     self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0)
 
-                    logger.debug("Using closed-form MME initialization for p")
-                    logger.debug("Should train p: %s", self._train_loc)
+
+                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for mean")
                 elif init_a.lower() == "standard":
-                    groupwise_means, init_a_intercept, rmsd_a = closedform_beta_glm_logp(
-                        X=input_data.X,
-                        design_loc=input_data.design_loc[:, [0]],
-                        constraints_loc=input_data.constraints_loc[[0], [0]].values,
-                        design_scale=input_data.design_scale[:, [0]],
-                        constraints=input_data.constraints_scale[[0], [0]].values,
-                        size_factors=size_factors_init,
-                        link_fn=lambda p: np.log(self.np_clip_param(p, "p"))
-                    )
+                    if isinstance(input_data.X, SparseXArrayDataArray):
+                        overall_means = input_data.X.mean(dim="observations")
+                    else:
+                        overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
+                    overall_means = self.np_clip_param(overall_means, "mean")
+
                     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
-                    init_a[0, :] = init_a_intercept
+                    init_a[0, :] = np.log(overall_means/(1-overall_means))
                     self._train_loc = True
 
-                    logger.debug("Using standard initialization for p")
-                    logger.debug("Should train p: %s", self._train_loc)
+                    logging.getLogger("batchglm").debug("Using standard initialization for mean")
                 elif init_a.lower() == "all_zero":
                     init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
                     self._train_loc = True
 
-                    logger.debug("Using all_zero initialization for p")
-                    logger.debug("Should train p: %s", self._train_loc)
+                    logging.getLogger("batchglm").debug("Using all zero initialization for mean")
                 else:
                     raise ValueError("init_a string %s not recognized" % init_a)
-
+                logging.getLogger("batchglm").debug("Should train mean: %s", self._train_loc)
             if isinstance(init_b, str):
                 if init_b.lower() == "auto":
-                    init_b = "closed_form"
+                    init_b = "standard"
 
                 if init_b.lower() == "standard":
-                    groupwise_scales, init_b_intercept, rmsd_b = closedform_beta_glm_logq(
+                    groupwise_scales, init_b_intercept, rmsd_b = closedform_beta_glm_logsamplesize(
                         X=input_data.X,
-                        design_loc=input_data.design_loc[:, [0]],
-                        constraints_loc=input_data.constraints_loc[[0], [0]].values,
                         design_scale=input_data.design_scale[:, [0]],
                         constraints=input_data.constraints_scale[[0], [0]].values,
                         size_factors=size_factors_init,
-                        link_fn=lambda q: np.log(self.np_clip_param(q, "q"))
+                        groupwise_means=None,
+                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
                     )
-                    init_b = np.zeros([input_data.num_loc_params, input_data.num_features])
+                    init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
                     init_b[0, :] = init_b_intercept
-                    self._train_scale = True
 
-                    logger.debug("Using standard initialization for q")
-                    logger.debug("Should train q: %s", self._train_loc)
+                    logging.getLogger("batchglm").debug("Using standard-form MME initialization for dispersion")
                 elif init_b.lower() == "closed_form":
-                    groupwise_scales, init_b, rmsd_b = closedform_beta_glm_logq(
+                    dmats_unequal = False
+                    if input_data.design_loc.shape[1] == input_data.design_scale.shape[1]:
+                        if np.any(input_data.design_loc.values != input_data.design_scale.values):
+                            dmats_unequal = True
+
+                    inits_unequal = False
+                    if init_a_str is not None:
+                        if init_a_str != init_b:
+                            inits_unequal = True
+
+                    if inits_unequal or dmats_unequal:
+                        raise ValueError("cannot use closed_form init for scale model " +
+                                         "if scale model differs from loc model")
+
+                    groupwise_scales, init_b, rmsd_b = closedform_beta_glm_logsamplesize(
                         X=input_data.X,
-                        design_loc=input_data.design_loc,
-                        constraints_loc=input_data.constraints_loc.values,
                         design_scale=input_data.design_scale,
                         constraints=input_data.constraints_scale.values,
                         size_factors=size_factors_init,
-                        link_fn=lambda q: np.log(self.np_clip_param(q, "q"))
+                        groupwise_means=groupwise_means,
+                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
                     )
-                    # train q, if the closed-form solution is inaccurate
-                    self._train_scale = not (np.all(rmsd_b == 0) or rmsd_b.size == 0)
 
-                    logger.debug("Using closed-form MME initialization for q")
-                    logger.debug("Should train q: %s", self._train_scale)
+                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for dispersion")
                 elif init_b.lower() == "all_zero":
                     init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
 
-                    logger.debug("Using all_zero initialization for q")
-                    logger.debug("Should train r: %s", self._train_scale)
+                    logging.getLogger("batchglm").debug("Using standard initialization for dispersion")
                 else:
                     raise ValueError("init_b string %s not recognized" % init_b)
+                logging.getLogger("batchglm").debug("Should train r: %s", self._train_scale)
         else:
             # Locations model:
             if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"):
@@ -271,7 +284,7 @@ def init_par(
                     init_loc[my_idx] = init_model.a_var[init_idx]
 
                 init_a = init_loc
-                logger.debug("Using initialization based on input model for mean")
+                logging.getLogger("batchglm").debug("Using initialization based on input model for mean")
 
             # Scale model:
             if isinstance(init_b, str) and (init_b.lower() == "auto" or init_b.lower() == "init_model"):
@@ -285,7 +298,7 @@ def init_par(
                     init_scale[my_idx] = init_model.b_var[init_idx]
 
                 init_b = init_scale
-                logger.debug("Using initialization based on input model for dispersion")
+                logging.getLogger("batchglm").debug("Using initialization based on input model for dispersion")
 
         return init_a, init_b
 
diff --git a/batchglm/train/tf/glm_beta/external.py b/batchglm/train/tf/glm_beta/external.py
index 66eed360..78c25640 100644
--- a/batchglm/train/tf/glm_beta/external.py
+++ b/batchglm/train/tf/glm_beta/external.py
@@ -3,7 +3,7 @@
 from batchglm.models.base.input import SparseXArrayDataSet, SparseXArrayDataArray
 from batchglm.models.glm_beta import AbstractEstimator, EstimatorStoreXArray, InputData, Model
 from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
-from batchglm.models.glm_beta.utils import closedform_beta_glm_logp, closedform_beta_glm_logq
+from batchglm.models.glm_beta.utils import closedform_beta_glm_logitmean, closedform_beta_glm_logsamplesize
 
 import batchglm.train.tf.ops as op_utils
 import batchglm.train.tf.train as train_utils
diff --git a/batchglm/train/tf/glm_beta/hessians.py b/batchglm/train/tf/glm_beta/hessians.py
index e2b71d03..066ebe4f 100644
--- a/batchglm/train/tf/glm_beta/hessians.py
+++ b/batchglm/train/tf/glm_beta/hessians.py
@@ -9,31 +9,47 @@
 
 class Hessians(HessianGLMALL):
 
-    def _weight_hessian_ab(
+    def _weight_hessian_aa(
             self,
             X,
             loc,
             scale,
     ):
+        one_minus_loc = tf.ones_like(loc) - loc
+        loc_times_scale = loc * scale
+        one_minus_loc_times_scale = one_minus_loc * scale
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = tf.polygamma(scalar_one, loc + scale) * loc * scale
 
+        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
+        else:
+            const1 = tf.log(X / (tf.ones_like(X) - X))
+
+        const2 = (tf.ones_like(loc) - 2 * loc) * (- tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1)
+        const3 = loc * one_minus_loc_times_scale * (- tf.polygamma(scalar_one, loc_times_scale) - tf.polygamma(scalar_one, one_minus_loc_times_scale))
+        const = loc * one_minus_loc_times_scale * (const2 + const3)
         return const
 
-    def _weight_hessian_aa(
+    def _weight_hessian_ab(
             self,
             X,
             loc,
             scale,
     ):
+        one_minus_loc = tf.ones_like(loc) - loc
+        loc_times_scale = loc * scale
+        one_minus_loc_times_scale = one_minus_loc * scale
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            Xdense = tf.sparse.to_dense(X)
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
         else:
-            Xdense = X
+            const1 = tf.log(X / (tf.ones_like(X) - X))
 
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = loc * (tf.digamma(loc+scale) - tf.digamma(loc) + tf.log(Xdense) +
-                       loc*(tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, loc)))
+        const2 = - tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1
+        const3 = scale * (- tf.polygamma(scalar_one, loc_times_scale) * loc + one_minus_loc * tf.polygamma(scalar_one, one_minus_loc_times_scale))
+
+        const = loc * one_minus_loc_times_scale * (const2 + const3)
 
         return const
 
@@ -43,14 +59,24 @@ def _weight_hessian_bb(
             loc,
             scale,
     ):
+        one_minus_loc = tf.ones_like(loc) - loc
+        loc_times_scale = loc * scale
+        one_minus_loc_times_scale = one_minus_loc * scale
+        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
+
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
+            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
         else:
-            one_minus_X = tf.ones_like(X) - X
+            const1 = tf.log(X / (tf.ones_like(X) - X))
 
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-        const = scale * (tf.digamma(loc + scale) - tf.digamma(scale) + tf.log(one_minus_X) +
-                         scale * (tf.polygamma(scalar_one, loc+scale) - tf.polygamma(scalar_one, scale)))
+        const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\
+                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
+                 + tf.digamma(scale)
+        const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\
+                          + tf.polygamma(scalar_one, scale)\
+                          - tf.polygamma(scalar_one, one_minus_loc_times_scale) * tf.square(one_minus_loc))
+        const = scale * (const2 + const3)
 
         return const
 
+
diff --git a/batchglm/train/tf/glm_beta/jacobians.py b/batchglm/train/tf/glm_beta/jacobians.py
index eae30d72..1eec6172 100644
--- a/batchglm/train/tf/glm_beta/jacobians.py
+++ b/batchglm/train/tf/glm_beta/jacobians.py
@@ -15,12 +15,13 @@ def _weights_jac_a(
             loc,
             scale,
     ):
+        one_minus_loc = tf.ones_like(loc) - loc
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            Xdense = tf.sparse.to_dense(X)
+            const1 = tf.log(tf.sparse.to_dense(X)/-tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
         else:
-            Xdense = X
-        const = (tf.digamma(loc+scale) - tf.digamma(loc) + tf.log(Xdense)) * loc
-
+            const1 = tf.log(X/(tf.ones_like(X)-X))
+        const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1
+        const = const2 * scale * loc * one_minus_loc
         return const
 
     def _weights_jac_b(
@@ -31,10 +32,11 @@ def _weights_jac_b(
     ):
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
             one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
+            Xdense = tf.sparse.to_dense(X)
         else:
             one_minus_X = tf.ones_like(X) - X
-
-
-        const = (tf.digamma(loc+scale) - tf.digamma(scale) + tf.log(one_minus_X)) * scale
-
+            Xdense = X
+        one_minus_loc = tf.ones_like(X) - loc
+        const = scale * (tf.digamma(scale) - tf.digamma(loc*scale)*loc - tf.digamma(one_minus_loc*scale)*one_minus_loc
+            + loc * tf.log(Xdense) + one_minus_loc * tf.log(one_minus_X))
         return const
diff --git a/batchglm/train/tf/glm_beta/model.py b/batchglm/train/tf/glm_beta/model.py
index b1bfd416..170cf403 100644
--- a/batchglm/train/tf/glm_beta/model.py
+++ b/batchglm/train/tf/glm_beta/model.py
@@ -26,24 +26,31 @@ def param_bounds(
             dmax = np.finfo(dtype).max
             dtype = dtype.type
 
+        zero = np.nextafter(0, np.inf, dtype=dtype)
+        one = np.nextafter(1, -np.inf, dtype=dtype)
+
         sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
         bounds_min = {
-            "a_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
-            "b_var": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
-            "eta_loc": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
-            "eta_scale": np.log(np.nextafter(0, np.inf, dtype=dtype)) / sf,
-            "p": np.nextafter(0, np.inf, dtype=dtype),
-            "q": np.nextafter(0, np.inf, dtype=dtype),
+            #"a_var": np.log(zero/(1-zero)) / sf,
+            "a_var": dmin,
+            "b_var": np.log(zero) / sf,
+            #"eta_loc": np.log(zero/(1-zero)) / sf,
+            "eta_loc": dmin,
+            "eta_scale": np.log(zero) / sf,
+            "mean": zero,
+            "samplesize": zero,
             "probs": dtype(0),
-            "log_probs": np.log(np.nextafter(0, np.inf, dtype=dtype)),
+            "log_probs": np.log(zero),
         }
         bounds_max = {
-            "a_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            #"a_var": np.log(one/(1-one)) / sf,
+            "a_var": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
             "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            "eta_loc": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
+            #"eta_loc": np.log(one/(1-one)) / sf,
+            "eta_loc": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            "p": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "q": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
+            "mean": one,
+            "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
             "probs": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
             "log_probs": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
         }
@@ -88,33 +95,33 @@ def __init__(
         eta_scale = self.tf_clip_param(eta_scale, "eta_scale")
         
         # Inverse linker functions:
-        model_loc = tf.exp(eta_loc)
+        model_loc = tf.ones_like(eta_loc)/(tf.ones_like(eta_loc)+tf.exp(-eta_loc))
         model_scale = tf.exp(eta_scale)
 
         # Log-likelihood:
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
+            one_minus_X = -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=dtype))
             Xdense = tf.sparse.to_dense(X)
-            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
         else:
+            one_minus_X = tf.ones_like(X) - X
             Xdense = X
-            one_minus_X = tf.ones_like(X)-X
-        g = tf.lgamma(model_loc+model_scale) - tf.lgamma(model_loc) - tf.lgamma(model_scale)
-        l = (model_loc - tf.ones_like(model_loc)) * tf.log(Xdense) + (model_scale - tf.ones_like(model_scale)) * tf.log(one_minus_X)
-        log_probs = g + l
 
+        one_minus_loc = tf.ones_like(model_loc) - model_loc
+        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale)\
+                    - tf.lgamma(one_minus_loc * model_scale)\
+                    + (model_scale * model_loc - tf.ones_like(model_loc)) * tf.log(Xdense)\
+                    + (one_minus_loc * model_scale - tf.ones_like(model_loc)) * tf.log(one_minus_X)
         a = tf.print("log_probs: \n", log_probs)
-
-        log_probs = self.tf_clip_param(log_probs, "log_probs")
-
-        b = tf.print("log_probs geclipt: \n", log_probs)
-        c = tf.print("model_loc: \n", model_loc)
-        d = tf.print("model_scale: \n", model_scale)
-        # e = tf.print("first part: \n", h)
-        # f = tf.print("second part: \n", i)
+        b = tf.print("model_loc: \n", model_loc)
+        c = tf.print("model_scale: \n", model_scale)
+        d = tf.print("X: \n", X)
+        e = tf.print("a_var: \n", a_var)
+        f = tf.print("eta_loc: \n", eta_loc)
+        with tf.control_dependencies([a, b, c, d, e, f]):
+            log_probs = self.tf_clip_param(log_probs, "log_probs")
 
         # Variance:
-        sigma2 = (model_loc * model_scale) \
-                 / ((model_loc + model_scale)**2 * (model_loc + model_scale + tf.ones_like(model_scale)))
+        sigma2 = (model_loc * one_minus_loc) / (tf.ones_like(model_loc) + model_scale)
 
         self.X = X
         self.design_loc = design_loc
@@ -130,10 +137,9 @@ def __init__(
         self.eta_scale = eta_scale
         self.model_loc = model_loc
         self.model_scale = model_scale
-        self.p = model_loc
-        self.q = model_scale
+        self.mean = model_loc
+        self.samplesize = model_scale
 
-        with tf.control_dependencies([a,b,c,d]):
-            self.log_probs = log_probs
+        self.log_probs = log_probs
 
         self.sigma2 = sigma2
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta/reducible_tensors.py b/batchglm/train/tf/glm_beta/reducible_tensors.py
index 862ccaf8..a89103ea 100644
--- a/batchglm/train/tf/glm_beta/reducible_tensors.py
+++ b/batchglm/train/tf/glm_beta/reducible_tensors.py
@@ -5,7 +5,7 @@
 from .jacobians import Jacobians
 from .fim import FIM
 
-logger = logging.getLogger("batchglm")
+logger = logging.getLogger(__name__)
 
 
 class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL):
diff --git a/batchglm/train/tf/glm_beta/training_strategies.py b/batchglm/train/tf/glm_beta/training_strategies.py
index d9e57377..9bd8b271 100644
--- a/batchglm/train/tf/glm_beta/training_strategies.py
+++ b/batchglm/train/tf/glm_beta/training_strategies.py
@@ -1,27 +1,37 @@
 from enum import Enum
 
-
 class TrainingStrategies(Enum):
 
     AUTO = None
     DEFAULT = [
         {
-            "convergence_criteria": "all_converged",
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-8,
             "use_batching": False,
-            "optim_algo": "irls_gd_tr",
+            "optim_algo": "nr_tr",
         },
     ]
-    IRLS = [
+    INEXACT = [
         {
-            "convergence_criteria": "all_converged",
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-6,
             "use_batching": False,
-            "optim_algo": "irls_gd_tr",
+            "optim_algo": "nr_tr",
         },
     ]
-    IRLS_BATCHED = [
+    EXACT = [
         {
-            "convergence_criteria": "all_converged",
-            "use_batching": True,
-            "optim_algo": "irls_gd_tr",
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-8,
+            "use_batching": False,
+            "optim_algo": "nr_tr",
         },
     ]
+    IRLS = [
+        {
+            "convergence_criteria": "all_converged_ll",
+            "stopping_criteria": 1e-8,
+            "use_batching": False,
+            "optim_algo": "irls_tr",
+        },
+    ]
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta2/__init__.py b/batchglm/train/tf/glm_beta2/__init__.py
deleted file mode 100644
index 4db081bb..00000000
--- a/batchglm/train/tf/glm_beta2/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from .estimator import Estimator
-from .estimator_graph import EstimatorGraph
-from .model import BasicModelGraph, ModelVars, ProcessModel
-from .hessians import Hessians
-from .fim import FIM
-from .jacobians import Jacobians
-from .reducible_tensors import ReducibleTensors
diff --git a/batchglm/train/tf/glm_beta2/estimator.py b/batchglm/train/tf/glm_beta2/estimator.py
deleted file mode 100644
index 668bd421..00000000
--- a/batchglm/train/tf/glm_beta2/estimator.py
+++ /dev/null
@@ -1,307 +0,0 @@
-import logging
-from typing import Union
-
-import numpy as np
-import tensorflow as tf
-
-from .external import AbstractEstimator, EstimatorAll, ESTIMATOR_PARAMS, InputData, Model
-from .external import closedform_beta2_glm_logitmean, closedform_beta2_glm_logsamplesize
-from .external import SparseXArrayDataArray
-from .estimator_graph import EstimatorGraph
-from .model import ProcessModel
-from .training_strategies import TrainingStrategies
-
-logger = logging.getLogger("batchglm")
-
-
-class Estimator(EstimatorAll, AbstractEstimator, ProcessModel):
-    """
-    Estimator for Generalized Linear Models (GLMs) with beta2 distributed noise.
-    Uses a logit linker function for loc and log linker function for scale.
-    """
-
-    def __init__(
-            self,
-            input_data: InputData,
-            batch_size: int = 500,
-            graph: tf.Graph = None,
-            init_model: Model = None,
-            init_a: Union[np.ndarray, str] = "AUTO",
-            init_b: Union[np.ndarray, str] = "AUTO",
-            quick_scale: bool = False,
-            model: EstimatorGraph = None,
-            provide_optimizers: dict = {
-                "gd": True,
-                "adam": True,
-                "adagrad": True,
-                "rmsprop": True,
-                "nr": True,
-                "nr_tr": True,
-                "irls": False,
-                "irls_gd": False,
-                "irls_tr": False,
-                "irls_gd_tr": False,
-            },
-            provide_batched: bool = False,
-            provide_fim: bool = False,
-            provide_hessian: bool = False,
-            optim_algos: list = [],
-            extended_summary=False,
-            dtype="float64"
-    ):
-        """
-        Performs initialisation and creates a new estimator.
-
-        :param input_data: InputData
-            The input data
-        :param batch_size: int
-            Size of mini-batches used.
-        :param graph: (optional) tf.Graph
-        :param init_model: (optional)
-            If provided, this model will be used to initialize this Estimator.
-        :param init_a: (Optional)
-            Low-level initial values for a. Can be:
-
-            - str:
-                * "auto": automatically choose best initialization
-                * "random": initialize with random values
-                * "standard": initialize intercept with observed mean
-                * "init_model": initialize with another model (see `ìnit_model` parameter)
-                * "closed_form": try to initialize with closed form
-            - np.ndarray: direct initialization of 'a'
-        :param init_b: (Optional)
-            Low-level initial values for b. Can be:
-
-            - str:
-                * "auto": automatically choose best initialization
-                * "random": initialize with random values
-                * "standard": initialize with zeros
-                * "init_model": initialize with another model (see `ìnit_model` parameter)
-                * "closed_form": try to initialize with closed form
-            - np.ndarray: direct initialization of 'b'
-        :param quick_scale: bool
-            Whether `scale` will be fitted faster and maybe less accurate.
-            Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
-        :param model: EstimatorGraph
-            EstimatorGraph to use. Basically for debugging.
-        :param provide_optimizers:
-
-            E.g.    {"gd": False, "adam": False, "adagrad": False, "rmsprop": False,
-                    "nr": False, "nr_tr": True,
-                    "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}
-        :param provide_batched: bool
-            Whether mini-batched optimizers should be provided.
-        :param provide_fim: Whether to compute fisher information matrix during training
-            Either supply provide_fim and provide_hessian or optim_algos.
-        :param provide_hessian: Whether to compute hessians during training
-            Either supply provide_fim and provide_hessian or optim_algos.
-        :param optim_algos: Algorithms that you want to use on this object. Depending on that,
-            the hessian and/or fisher information matrix are computed.
-            Either supply provide_fim and provide_hessian or optim_algos.
-        :param extended_summary: Include detailed information in the summaries.
-            Will increase runtime of summary writer, use only for debugging.
-        :param dtype: Precision used in tensorflow.
-        """
-        self.TrainingStrategies = TrainingStrategies
-
-        self._input_data = input_data
-        self._train_loc = True
-        self._train_scale = True
-
-        (init_a, init_b) = self.init_par(
-            input_data=input_data,
-            init_a=init_a,
-            init_b=init_b,
-            init_model=init_model
-        )
-        init_a = init_a.astype(dtype)
-        init_b = init_b.astype(dtype)
-        if quick_scale:
-            self._train_scale = False
-
-        print("init_a")
-        print(init_a)
-        print("init_b")
-        print(init_b)
-
-        if len(optim_algos) > 0:
-            if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
-                provide_hessian = True
-            if np.any([x.lower() in ["irls", "irls_tr"] for x in optim_algos]):
-                provide_fim = True
-
-        EstimatorAll.__init__(
-            self=self,
-            input_data=input_data,
-            batch_size=batch_size,
-            graph=graph,
-            init_a=init_a,
-            init_b=init_b,
-            model=model,
-            provide_optimizers=provide_optimizers,
-            provide_batched=provide_batched,
-            provide_fim=provide_fim,
-            provide_hessian=provide_hessian,
-            extended_summary=extended_summary,
-            noise_model="beta2",
-            dtype=dtype
-        )
-
-    @classmethod
-    def param_shapes(cls) -> dict:
-        return ESTIMATOR_PARAMS
-
-    def init_par(
-            self,
-            input_data,
-            init_a,
-            init_b,
-            init_model
-    ):
-        r"""
-        standard:
-        Only initialise intercept and keep other coefficients as zero.
-
-        closed-form:
-        Initialize with Maximum Likelihood / Maximum of Momentum estimators
-
-        Idea:
-        $$
-            \theta &= f(x) \\
-            \Rightarrow f^{-1}(\theta) &= x \\
-                &= (D \cdot D^{+}) \cdot x \\
-                &= D \cdot (D^{+} \cdot x) \\
-                &= D \cdot x' = f^{-1}(\theta)
-        $$
-        """
-
-        size_factors_init = input_data.size_factors
-
-        if init_model is None:
-            groupwise_means = None
-            init_a_str = None
-            if isinstance(init_a, str):
-                init_a_str = init_a.lower()
-                # Chose option if auto was chosen
-                if init_a.lower() == "auto":
-                    init_a = "closed_form"
-
-                if init_a.lower() == "closed_form":
-                    groupwise_means, init_a, rmsd_a = closedform_beta2_glm_logitmean(
-                        X=input_data.X,
-                        design_loc=input_data.design_loc,
-                        constraints_loc=input_data.constraints_loc.values,
-                        size_factors=size_factors_init,
-                        link_fn=lambda mean: np.log(
-                            1/(1/self.np_clip_param(mean, "mean")-1)
-                        )
-                    )
-
-                    # train mu, if the closed-form solution is inaccurate
-                    self._train_loc = not (np.all(rmsd_a == 0) or rmsd_a.size == 0)
-
-
-                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for mean")
-                elif init_a.lower() == "standard":
-                    if isinstance(input_data.X, SparseXArrayDataArray):
-                        overall_means = input_data.X.mean(dim="observations")
-                    else:
-                        overall_means = input_data.X.mean(dim="observations").values  # directly calculate the mean
-                    overall_means = self.np_clip_param(overall_means, "mean")
-
-                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
-                    init_a[0, :] = np.log(overall_means/(1-overall_means))
-                    self._train_loc = True
-
-                    logging.getLogger("batchglm").debug("Using standard initialization for mean")
-                elif init_a.lower() == "all_zero":
-                    init_a = np.zeros([input_data.num_loc_params, input_data.num_features])
-                    self._train_loc = True
-
-                    logging.getLogger("batchglm").debug("Using all zero initialization for mean")
-                else:
-                    raise ValueError("init_a string %s not recognized" % init_a)
-                logging.getLogger("batchglm").debug("Should train mean: %s", self._train_loc)
-            if isinstance(init_b, str):
-                if init_b.lower() == "auto":
-                    init_b = "standard"
-
-                if init_b.lower() == "standard":
-                    groupwise_scales, init_b_intercept, rmsd_b = closedform_beta2_glm_logsamplesize(
-                        X=input_data.X,
-                        design_scale=input_data.design_scale[:, [0]],
-                        constraints=input_data.constraints_scale[[0], [0]].values,
-                        size_factors=size_factors_init,
-                        groupwise_means=None,
-                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
-                    )
-                    init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
-                    init_b[0, :] = init_b_intercept
-
-                    logging.getLogger("batchglm").debug("Using standard-form MME initialization for dispersion")
-                elif init_b.lower() == "closed_form":
-                    dmats_unequal = False
-                    if input_data.design_loc.shape[1] == input_data.design_scale.shape[1]:
-                        if np.any(input_data.design_loc.values != input_data.design_scale.values):
-                            dmats_unequal = True
-
-                    inits_unequal = False
-                    if init_a_str is not None:
-                        if init_a_str != init_b:
-                            inits_unequal = True
-
-                    if inits_unequal or dmats_unequal:
-                        raise ValueError("cannot use closed_form init for scale model " +
-                                         "if scale model differs from loc model")
-
-                    groupwise_scales, init_b, rmsd_b = closedform_beta2_glm_logsamplesize(
-                        X=input_data.X,
-                        design_scale=input_data.design_scale,
-                        constraints=input_data.constraints_scale.values,
-                        size_factors=size_factors_init,
-                        groupwise_means=groupwise_means,
-                        link_fn=lambda samplesize: np.log(self.np_clip_param(samplesize, "samplesize"))
-                    )
-
-                    logging.getLogger("batchglm").debug("Using closed-form MME initialization for dispersion")
-                elif init_b.lower() == "all_zero":
-                    init_b = np.zeros([input_data.num_scale_params, input_data.X.shape[1]])
-
-                    logging.getLogger("batchglm").debug("Using standard initialization for dispersion")
-                else:
-                    raise ValueError("init_b string %s not recognized" % init_b)
-                logging.getLogger("batchglm").debug("Should train r: %s", self._train_scale)
-        else:
-            # Locations model:
-            if isinstance(init_a, str) and (init_a.lower() == "auto" or init_a.lower() == "init_model"):
-                my_loc_names = set(input_data.loc_names.values)
-                my_loc_names = my_loc_names.intersection(set(init_model.input_data.loc_names.values))
-
-                init_loc = np.zeros([input_data.num_loc_params, input_data.num_features])
-                for parm in my_loc_names:
-                    init_idx = np.where(init_model.input_data.loc_names == parm)[0]
-                    my_idx = np.where(input_data.loc_names == parm)[0]
-                    init_loc[my_idx] = init_model.a_var[init_idx]
-
-                init_a = init_loc
-                logging.getLogger("batchglm").debug("Using initialization based on input model for mean")
-
-            # Scale model:
-            if isinstance(init_b, str) and (init_b.lower() == "auto" or init_b.lower() == "init_model"):
-                my_scale_names = set(input_data.scale_names.values)
-                my_scale_names = my_scale_names.intersection(init_model.input_data.scale_names.values)
-
-                init_scale = np.zeros([input_data.num_scale_params, input_data.num_features])
-                for parm in my_scale_names:
-                    init_idx = np.where(init_model.input_data.scale_names == parm)[0]
-                    my_idx = np.where(input_data.scale_names == parm)[0]
-                    init_scale[my_idx] = init_model.b_var[init_idx]
-
-                init_b = init_scale
-                logging.getLogger("batchglm").debug("Using initialization based on input model for dispersion")
-
-        return init_a, init_b
-
-    @property
-    def input_data(self) -> InputData:
-        return self._input_data
diff --git a/batchglm/train/tf/glm_beta2/estimator_graph.py b/batchglm/train/tf/glm_beta2/estimator_graph.py
deleted file mode 100644
index 8e609600..00000000
--- a/batchglm/train/tf/glm_beta2/estimator_graph.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import logging
-
-from .model import ProcessModel
-from .external import EstimatorGraphAll
-
-logger = logging.getLogger(__name__)
-
-
-class EstimatorGraph(ProcessModel, EstimatorGraphAll):
-    """
-    Full class.
-    """
diff --git a/batchglm/train/tf/glm_beta2/external.py b/batchglm/train/tf/glm_beta2/external.py
deleted file mode 100644
index 54facb78..00000000
--- a/batchglm/train/tf/glm_beta2/external.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import batchglm.data as data_utils
-
-from batchglm.models.base.input import SparseXArrayDataSet, SparseXArrayDataArray
-from batchglm.models.glm_beta2 import AbstractEstimator, EstimatorStoreXArray, InputData, Model
-from batchglm.models.base_glm.utils import closedform_glm_mean, closedform_glm_scale
-from batchglm.models.glm_beta2.utils import closedform_beta2_glm_logitmean, closedform_beta2_glm_logsamplesize
-
-import batchglm.train.tf.ops as op_utils
-import batchglm.train.tf.train as train_utils
-from batchglm.train.tf.base import TFEstimatorGraph, MonitoredTFEstimator
-
-from batchglm.train.tf.base_glm import GradientGraphGLM, NewtonGraphGLM, TrainerGraphGLM, EstimatorGraphGLM, FullDataModelGraphGLM, BasicModelGraphGLM
-from batchglm.train.tf.base_glm import ESTIMATOR_PARAMS, ProcessModelGLM, ModelVarsGLM
-from batchglm.train.tf.base_glm import HessiansGLM, FIMGLM, JacobiansGLM
-
-from batchglm.train.tf.base_glm_all import EstimatorAll, EstimatorGraphAll, FIMGLMALL, HessianGLMALL, JacobiansGLMALL, ReducableTensorsGLMALL
-
-import batchglm.utils.random as rand_utils
-from batchglm.utils.linalg import groupwise_solve_lm
-from batchglm import pkg_constants
diff --git a/batchglm/train/tf/glm_beta2/fim.py b/batchglm/train/tf/glm_beta2/fim.py
deleted file mode 100644
index 69817fbe..00000000
--- a/batchglm/train/tf/glm_beta2/fim.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import tensorflow as tf
-
-import logging
-
-from .external import FIMGLMALL
-
-logger = logging.getLogger(__name__)
-
-
-class FIM(FIMGLMALL):
-    # No Fisher Information Matrices due to unsolvable E[log(X)]
-
-    def _weight_fim_aa(
-            self,
-            loc,
-            scale
-    ):
-        assert False, "not implemented"
-
-    def _weight_fim_bb(
-            self,
-            loc,
-            scale
-    ):
-        assert False, "not implemented"
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta2/hessians.py b/batchglm/train/tf/glm_beta2/hessians.py
deleted file mode 100644
index 066ebe4f..00000000
--- a/batchglm/train/tf/glm_beta2/hessians.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import tensorflow as tf
-
-import logging
-
-from .external import HessianGLMALL
-
-logger = logging.getLogger(__name__)
-
-
-class Hessians(HessianGLMALL):
-
-    def _weight_hessian_aa(
-            self,
-            X,
-            loc,
-            scale,
-    ):
-        one_minus_loc = tf.ones_like(loc) - loc
-        loc_times_scale = loc * scale
-        one_minus_loc_times_scale = one_minus_loc * scale
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-
-        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
-        else:
-            const1 = tf.log(X / (tf.ones_like(X) - X))
-
-        const2 = (tf.ones_like(loc) - 2 * loc) * (- tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1)
-        const3 = loc * one_minus_loc_times_scale * (- tf.polygamma(scalar_one, loc_times_scale) - tf.polygamma(scalar_one, one_minus_loc_times_scale))
-        const = loc * one_minus_loc_times_scale * (const2 + const3)
-        return const
-
-    def _weight_hessian_ab(
-            self,
-            X,
-            loc,
-            scale,
-    ):
-        one_minus_loc = tf.ones_like(loc) - loc
-        loc_times_scale = loc * scale
-        one_minus_loc_times_scale = one_minus_loc * scale
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-
-        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
-        else:
-            const1 = tf.log(X / (tf.ones_like(X) - X))
-
-        const2 = - tf.digamma(loc_times_scale) + tf.digamma(one_minus_loc_times_scale) + const1
-        const3 = scale * (- tf.polygamma(scalar_one, loc_times_scale) * loc + one_minus_loc * tf.polygamma(scalar_one, one_minus_loc_times_scale))
-
-        const = loc * one_minus_loc_times_scale * (const2 + const3)
-
-        return const
-
-    def _weight_hessian_bb(
-            self,
-            X,
-            loc,
-            scale,
-    ):
-        one_minus_loc = tf.ones_like(loc) - loc
-        loc_times_scale = loc * scale
-        one_minus_loc_times_scale = one_minus_loc * scale
-        scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
-
-        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
-        else:
-            const1 = tf.log(X / (tf.ones_like(X) - X))
-
-        const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\
-                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
-                 + tf.digamma(scale)
-        const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\
-                          + tf.polygamma(scalar_one, scale)\
-                          - tf.polygamma(scalar_one, one_minus_loc_times_scale) * tf.square(one_minus_loc))
-        const = scale * (const2 + const3)
-
-        return const
-
-
diff --git a/batchglm/train/tf/glm_beta2/jacobians.py b/batchglm/train/tf/glm_beta2/jacobians.py
deleted file mode 100644
index 1eec6172..00000000
--- a/batchglm/train/tf/glm_beta2/jacobians.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import logging
-
-import tensorflow as tf
-
-from .external import JacobiansGLMALL
-
-logger = logging.getLogger(__name__)
-
-
-class Jacobians(JacobiansGLMALL):
-
-    def _weights_jac_a(
-            self,
-            X,
-            loc,
-            scale,
-    ):
-        one_minus_loc = tf.ones_like(loc) - loc
-        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X)/-tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
-        else:
-            const1 = tf.log(X/(tf.ones_like(X)-X))
-        const2 = - tf.digamma(loc*scale) + tf.digamma(one_minus_loc*scale) + const1
-        const = const2 * scale * loc * one_minus_loc
-        return const
-
-    def _weights_jac_b(
-            self,
-            X,
-            loc,
-            scale,
-    ):
-        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
-            Xdense = tf.sparse.to_dense(X)
-        else:
-            one_minus_X = tf.ones_like(X) - X
-            Xdense = X
-        one_minus_loc = tf.ones_like(X) - loc
-        const = scale * (tf.digamma(scale) - tf.digamma(loc*scale)*loc - tf.digamma(one_minus_loc*scale)*one_minus_loc
-            + loc * tf.log(Xdense) + one_minus_loc * tf.log(one_minus_X))
-        return const
diff --git a/batchglm/train/tf/glm_beta2/model.py b/batchglm/train/tf/glm_beta2/model.py
deleted file mode 100644
index 170cf403..00000000
--- a/batchglm/train/tf/glm_beta2/model.py
+++ /dev/null
@@ -1,145 +0,0 @@
-import logging
-
-import tensorflow as tf
-
-import numpy as np
-
-from .external import ProcessModelGLM, ModelVarsGLM, BasicModelGraphGLM
-from .external import pkg_constants
-
-logger = logging.getLogger(__name__)
-
-
-class ProcessModel(ProcessModelGLM):
-
-    def param_bounds(
-            self,
-            dtype
-    ):
-        if isinstance(dtype, tf.DType):
-            dmin = dtype.min
-            dmax = dtype.max
-            dtype = dtype.as_numpy_dtype
-        else:
-            dtype = np.dtype(dtype)
-            dmin = np.finfo(dtype).min
-            dmax = np.finfo(dtype).max
-            dtype = dtype.type
-
-        zero = np.nextafter(0, np.inf, dtype=dtype)
-        one = np.nextafter(1, -np.inf, dtype=dtype)
-
-        sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
-        bounds_min = {
-            #"a_var": np.log(zero/(1-zero)) / sf,
-            "a_var": dmin,
-            "b_var": np.log(zero) / sf,
-            #"eta_loc": np.log(zero/(1-zero)) / sf,
-            "eta_loc": dmin,
-            "eta_scale": np.log(zero) / sf,
-            "mean": zero,
-            "samplesize": zero,
-            "probs": dtype(0),
-            "log_probs": np.log(zero),
-        }
-        bounds_max = {
-            #"a_var": np.log(one/(1-one)) / sf,
-            "a_var": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
-            "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            #"eta_loc": np.log(one/(1-one)) / sf,
-            "eta_loc": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
-            "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            "mean": one,
-            "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "probs": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
-            "log_probs": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-        }
-        return bounds_min, bounds_max
-
-
-class ModelVars(ProcessModel, ModelVarsGLM):
-    """
-    Full class.
-    """
-
-
-class BasicModelGraph(ProcessModel, BasicModelGraphGLM):
-
-    def __init__(
-            self,
-            X,
-            design_loc,
-            design_scale,
-            constraints_loc,
-            constraints_scale,
-            a_var,
-            b_var,
-            dtype,
-            size_factors=None
-    ):
-        a_var = self.tf_clip_param(a_var, "a_var")
-        b_var = self.tf_clip_param(b_var, "b_var")
-
-        if constraints_loc is not None:
-            eta_loc = tf.matmul(design_loc, tf.matmul(constraints_loc, a_var))
-        else:
-            eta_loc = tf.matmul(design_loc, a_var)
-
-        eta_loc = self.tf_clip_param(eta_loc, "eta_loc")
-
-        if constraints_scale is not None:
-            eta_scale = tf.matmul(design_scale, tf.matmul(constraints_scale, b_var))
-        else:
-            eta_scale = tf.matmul(design_scale, b_var)
-
-        eta_scale = self.tf_clip_param(eta_scale, "eta_scale")
-        
-        # Inverse linker functions:
-        model_loc = tf.ones_like(eta_loc)/(tf.ones_like(eta_loc)+tf.exp(-eta_loc))
-        model_scale = tf.exp(eta_scale)
-
-        # Log-likelihood:
-        if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            one_minus_X = -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=dtype))
-            Xdense = tf.sparse.to_dense(X)
-        else:
-            one_minus_X = tf.ones_like(X) - X
-            Xdense = X
-
-        one_minus_loc = tf.ones_like(model_loc) - model_loc
-        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale)\
-                    - tf.lgamma(one_minus_loc * model_scale)\
-                    + (model_scale * model_loc - tf.ones_like(model_loc)) * tf.log(Xdense)\
-                    + (one_minus_loc * model_scale - tf.ones_like(model_loc)) * tf.log(one_minus_X)
-        a = tf.print("log_probs: \n", log_probs)
-        b = tf.print("model_loc: \n", model_loc)
-        c = tf.print("model_scale: \n", model_scale)
-        d = tf.print("X: \n", X)
-        e = tf.print("a_var: \n", a_var)
-        f = tf.print("eta_loc: \n", eta_loc)
-        with tf.control_dependencies([a, b, c, d, e, f]):
-            log_probs = self.tf_clip_param(log_probs, "log_probs")
-
-        # Variance:
-        sigma2 = (model_loc * one_minus_loc) / (tf.ones_like(model_loc) + model_scale)
-
-        self.X = X
-        self.design_loc = design_loc
-        self.design_scale = design_scale
-        self.constraints_loc = constraints_loc
-        self.constraints_scale = constraints_scale
-        self.a_var = a_var
-        self.b_var = b_var
-        self.size_factors = size_factors
-        self.dtype = dtype
-
-        self.eta_loc = eta_loc
-        self.eta_scale = eta_scale
-        self.model_loc = model_loc
-        self.model_scale = model_scale
-        self.mean = model_loc
-        self.samplesize = model_scale
-
-        self.log_probs = log_probs
-
-        self.sigma2 = sigma2
\ No newline at end of file
diff --git a/batchglm/train/tf/glm_beta2/reducible_tensors.py b/batchglm/train/tf/glm_beta2/reducible_tensors.py
deleted file mode 100644
index a89103ea..00000000
--- a/batchglm/train/tf/glm_beta2/reducible_tensors.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import logging
-
-from .external import ReducableTensorsGLMALL
-from .hessians import Hessians
-from .jacobians import Jacobians
-from .fim import FIM
-
-logger = logging.getLogger(__name__)
-
-
-class ReducibleTensors(Jacobians, Hessians, FIM, ReducableTensorsGLMALL):
-    """
-    """
diff --git a/batchglm/train/tf/glm_beta2/training_strategies.py b/batchglm/train/tf/glm_beta2/training_strategies.py
deleted file mode 100644
index 9bd8b271..00000000
--- a/batchglm/train/tf/glm_beta2/training_strategies.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from enum import Enum
-
-class TrainingStrategies(Enum):
-
-    AUTO = None
-    DEFAULT = [
-        {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-8,
-            "use_batching": False,
-            "optim_algo": "nr_tr",
-        },
-    ]
-    INEXACT = [
-        {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-6,
-            "use_batching": False,
-            "optim_algo": "nr_tr",
-        },
-    ]
-    EXACT = [
-        {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-8,
-            "use_batching": False,
-            "optim_algo": "nr_tr",
-        },
-    ]
-    IRLS = [
-        {
-            "convergence_criteria": "all_converged_ll",
-            "stopping_criteria": 1e-8,
-            "use_batching": False,
-            "optim_algo": "irls_tr",
-        },
-    ]
\ No newline at end of file
diff --git a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
index 770f0242..562cd521 100644
--- a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
+++ b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
@@ -36,8 +36,8 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
-            elif noise_model=="beta2":
-                from batchglm.api.models.glm_beta2 import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
             elif noise_model=="beta":
                 from batchglm.api.models.glm_beta import Estimator, InputData
             elif noise_model=="bern":
@@ -99,7 +99,7 @@ def eval_estimation_a(
             elif self.noise_model=="norm":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
-            elif self.noise_model=="beta2":
+            elif self.noise_model=="beta":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
             elif self.noise_model=="beta":
@@ -146,7 +146,7 @@ def eval_estimation_b(
             elif self.noise_model == "norm":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
-            elif self.noise_model == "beta2":
+            elif self.noise_model == "beta":
                 threshold_dev = 1e-2
                 threshold_std = 1e-1
             elif self.noise_model == "beta":
@@ -196,8 +196,6 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
-            elif self.noise_model=="beta2":
-                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model=="beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model=="bern":
@@ -235,14 +233,10 @@ def simulate_complex(self):
                 rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
                 rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
                 rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
-            elif self.noise_model=="beta2":
+            elif self.noise_model=="beta":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
                 rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
-            elif self.noise_model=="beta":
-                rand_fn_ave = lambda shape: np.random.uniform(10, 20, shape)
-                rand_fn_loc = lambda shape: np.random.uniform(10, 20, shape)
-                rand_fn_scale = lambda shape: np.random.uniform(10, 20, shape)
             elif self.noise_model=="bern":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
@@ -277,14 +271,10 @@ def rand_fn_standard(shape):
                 rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
                 rand_fn_loc = lambda shape: np.ones(shape)
                 rand_fn_scale = lambda shape: rand_fn_standard(shape)
-            elif self.noise_model=="beta2":
+            elif self.noise_model=="beta":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: 0.5*np.ones(shape)
                 rand_fn_scale = lambda shape: rand_fn_standard(shape)
-            elif self.noise_model=="beta":
-                rand_fn_ave = lambda shape: np.random.uniform(10, 20, shape)
-                rand_fn_loc = lambda shape: np.random.uniform(10, 20, shape)
-                rand_fn_scale = lambda shape: rand_fn_standard(shape)
             elif self.noise_model=="bern":
                 rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
                 rand_fn_loc = lambda shape: 0.5*np.ones(shape)
@@ -387,46 +377,18 @@ def test_a_standard_b_standard(self):
         self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
         self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
 
-class Test_AccuracyAnalytic_GLM_beta2(
-    Test_AccuracyAnalytic_GLM_ALL,
-    unittest.TestCase
-):
-    """
-    Test whether optimizers yield exact results for beta2 distributed noise.
-    """
-
-    def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_beta2.test_a_closed_b_closed()")
-
-        self.noise_model = "beta2"
-        self.simulate_complex()
-        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
-        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
-
-    def test_a_standard_b_standard(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_beta2.test_a_standard_b_standard()")
-
-        self.noise_model = "beta2"
-        self.simulate_easy()
-        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
-        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
-
-class Test_AccuracyAnalytic_GLM_beta(
+class Test_AccuracyAnalytic_GLM_BETA(
     Test_AccuracyAnalytic_GLM_ALL,
     unittest.TestCase
 ):
     """
-    Test whether optimizers yield exact results for beta2 distributed noise.
+    Test whether optimizers yield exact results for beta distributed noise.
     """
 
     def test_a_closed_b_closed(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_beta.test_a_closed_b_closed()")
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_closed_b_closed()")
 
         self.noise_model = "beta"
         self.simulate_complex()
@@ -436,7 +398,7 @@ def test_a_closed_b_closed(self):
     def test_a_standard_b_standard(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_beta.test_a_standard_b_standard()")
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_standard_b_standard()")
 
         self.noise_model = "beta"
         self.simulate_easy()
diff --git a/batchglm/unit_test/glm_all/test_graph_glm_all.py b/batchglm/unit_test/glm_all/test_graph_glm_all.py
index c6d5d9f4..b36327af 100644
--- a/batchglm/unit_test/glm_all/test_graph_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_graph_glm_all.py
@@ -32,8 +32,6 @@ def __init__(
                 from batchglm.api.models.glm_norm import Estimator, InputData
             elif noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator, InputData
-            elif noise_model=="beta2":
-                from batchglm.api.models.glm_beta2 import Estimator, InputData
             elif noise_model=="bern":
                 from batchglm.api.models.glm_bern import Estimator, InputData
             else:
@@ -105,8 +103,6 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
-            elif self.noise_model=="beta2":
-                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model=="beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model=="bern":
@@ -202,31 +198,6 @@ def test_batched_norm(self):
         self._test_batched(sparse=False)
         self._test_batched(sparse=True)
 
-class Test_Graph_GLM_BETA2(
-    Test_Graph_GLM_ALL,
-    unittest.TestCase
-):
-    """
-    Test whether training graphs work for beta2 distributed noise.
-    """
-
-    def test_full_beta2(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_BETA2.test_full_beta2()")
-
-        self.noise_model = "beta2"
-        self._test_full(sparse=False)
-        self._test_full(sparse=True)
-
-    def test_batched_beta2(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Graph_GLM_BETA2.test_batched_beta2()")
-
-        self.noise_model = "beta2"
-        self._test_batched(sparse=False)
-        self._test_batched(sparse=True)
 
 class Test_Graph_GLM_BETA(
     Test_Graph_GLM_ALL,
diff --git a/batchglm/unit_test/glm_all/test_hessians_glm_all.py b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
index 12428c93..2a387e25 100644
--- a/batchglm/unit_test/glm_all/test_hessians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
@@ -28,8 +28,6 @@ def simulate(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Simulator
-            elif self.noise_model == "beta2":
-                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model == "bern":
@@ -55,8 +53,6 @@ def get_hessians(
                 from batchglm.api.models.glm_nb import Estimator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Estimator
-            elif self.noise_model == "beta2":
-                from batchglm.api.models.glm_beta2 import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
             elif self.noise_model == "bern":
@@ -100,8 +96,6 @@ def _test_compute_hessians(self, sparse):
                 from batchglm.api.models.glm_nb import Simulator, InputData
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Simulator, InputData
-            elif self.noise_model == "beta2":
-                from batchglm.api.models.glm_beta2 import Simulator, InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator, InputData
             elif self.noise_model == "bern":
@@ -192,19 +186,6 @@ def test_compute_hessians_norm(self):
         return True
 
 
-class Test_Hessians_GLM_BETA2(Test_Hessians_GLM_ALL, unittest.TestCase):
-
-    def test_compute_hessians_beta2(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logging.getLogger("batchglm").error("Test_Hessians_GLM_BETA2.test_compute_hessians_beta2()")
-
-        self.noise_model = "beta2"
-        self._test_compute_hessians(sparse=False)
-        #self._test_compute_hessians(sparse=False)  # TODO tf>=1.13 waiting for tf.sparse.expand_dims to work
-
-        return True
-
 class Test_Hessians_GLM_BETA(Test_Hessians_GLM_ALL, unittest.TestCase):
 
     def test_compute_hessians_beta(self):
diff --git a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
index a2129fff..22236e3a 100644
--- a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
@@ -28,8 +28,6 @@ def simulate(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Simulator
-            elif self.noise_model == "beta2":
-                from batchglm.api.models.glm_beta2 import Simulator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model == "bern":
@@ -55,8 +53,6 @@ def get_jacs(
                 from batchglm.api.models.glm_nb import Estimator
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import Estimator
-            elif self.noise_model == "beta2":
-                from batchglm.api.models.glm_beta2 import Estimator
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import Estimator
             elif self.noise_model == "bern":
@@ -104,8 +100,6 @@ def compare_jacs(
                 from batchglm.api.models.glm_nb import InputData
             elif self.noise_model == "norm":
                 from batchglm.api.models.glm_norm import InputData
-            elif self.noise_model == "beta2":
-                from batchglm.api.models.glm_beta2 import InputData
             elif self.noise_model == "beta":
                 from batchglm.api.models.glm_beta import InputData
             elif self.noise_model == "bern":
@@ -193,16 +187,6 @@ def test_compute_jacobians_norm(self):
         self._test_compute_jacobians(sparse=False)
         #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
 
-class Test_Jacobians_GLM_BETA2(Test_Jacobians_GLM_ALL, unittest.TestCase):
-
-    def test_compute_jacobians_beta2(self):
-        logging.getLogger("tensorflow").setLevel(logging.INFO)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logging.getLogger("batchglm").error("Test_Jacobians_GLM_BETA2.test_compute_jacobians_beta2()")
-
-        self.noise_model = "beta2"
-        self._test_compute_jacobians(sparse=False)
-        #self._test_compute_jacobians(sparse=True)  #TODO automatic differentiation does not seem to work here yet.
 
 class Test_Jacobians_GLM_BETA(Test_Jacobians_GLM_ALL, unittest.TestCase):
 
diff --git a/batchglm/utils/random.py b/batchglm/utils/random.py
index 7cf7d4b3..f3fcd8ec 100644
--- a/batchglm/utils/random.py
+++ b/batchglm/utils/random.py
@@ -173,9 +173,9 @@ def sample(self, size=None):
         return random_data
 
 
-class Beta2:
+class Beta:
     r"""
-    beta2 distribution.
+    beta distribution.
     """
 
     p: np.ndarray
@@ -222,35 +222,4 @@ def sample(self, size=None):
             p=self.p,
             size=size
         )
-        return random_data
-
-
-class Beta:
-    r"""
-    Negative binomial distribution.
-    This class supports re-parameterising, sampling and calculation of
-    probabilities of negative binomial distributed data.
-    """
-
-    a: np.ndarray
-    b: np.ndarray
-
-    def __init__(self, p=None, q=None):
-        self.a = p
-        self.b = q
-
-    def sample(self, size=None):
-        """
-        Sample from all distributions data of size `size`.
-        :param size: The size
-        :return: numpy array containing sampled data
-
-        """
-        # numpy uses an alternative parametrization
-        # see also https://en.wikipedia.org/wiki/Negative_binomial_distribution#Alternative_formulations
-        random_data = np.random.beta(
-            a=self.a,
-            b=self.b,
-            size=size
-        )
-        return random_data
+        return random_data
\ No newline at end of file

From 5c747b1ffdaffdbce147ca73e40d21ac1b16fd4a Mon Sep 17 00:00:00 2001
From: ina258 <sabrina.richter100@gmail.com>
Date: Tue, 2 Apr 2019 17:03:26 +0200
Subject: [PATCH 12/12] cleaned up

---
 batchglm/models/glm_bern/simulator.py         |   2 +-
 batchglm/models/glm_beta/simulator.py         |   4 +-
 .../train/tf/base_glm_all/estimator_graph.py  |   5 +-
 batchglm/train/tf/glm_bern/estimator.py       |   3 -
 batchglm/train/tf/glm_beta/estimator.py       |   5 -
 batchglm/train/tf/glm_beta/hessians.py        |  10 +-
 batchglm/train/tf/glm_beta/model.py           |  26 +-
 batchglm/unit_test/base_glm/__init__.py       |   1 -
 .../base_glm/test_acc_analytic_glm.py         | 198 --------
 batchglm/unit_test/base_glm/test_acc_glm.py   |   2 +-
 batchglm/unit_test/glm_all/external.py        |   1 -
 .../glm_all/test_acc_analytic_glm_all.py      | 328 +++++++++++--
 .../glm_all/test_acc_analytic_glm_all_2.py    | 438 ------------------
 .../unit_test/glm_all/test_acc_glm_all.py     |  44 +-
 .../unit_test/glm_all/test_graph_glm_all.py   |   4 +-
 .../glm_all/test_hessians_glm_all.py          |  10 +-
 .../glm_all/test_jacobians_glm_all.py         |   7 +-
 tutorials/glm_norm.ipynb                      | 408 +++-------------
 18 files changed, 439 insertions(+), 1057 deletions(-)
 delete mode 100644 batchglm/unit_test/base_glm/test_acc_analytic_glm.py
 delete mode 100644 batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py

diff --git a/batchglm/models/glm_bern/simulator.py b/batchglm/models/glm_bern/simulator.py
index 2b2c938a..de96456e 100644
--- a/batchglm/models/glm_bern/simulator.py
+++ b/batchglm/models/glm_bern/simulator.py
@@ -24,7 +24,7 @@ def __init__(
 
     def generate_params(
             self,
-            rand_fn_ave=lambda shape: np.random.uniform(0.4, 0.4, shape),
+            rand_fn_ave=lambda shape: np.random.uniform(0.3, 0.4, shape),
             rand_fn=None,
             rand_fn_loc=lambda shape: np.random.uniform(0.4, 0.6, shape),
             rand_fn_scale=lambda shape: np.zeros(shape),
diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
index 7d9143d7..3afdb0b8 100644
--- a/batchglm/models/glm_beta/simulator.py
+++ b/batchglm/models/glm_beta/simulator.py
@@ -24,10 +24,10 @@ def __init__(
 
     def generate_params(
             self,
-            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
+            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.3, shape),
             rand_fn=None,
             rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1e1, 2*1e1, shape),
+            rand_fn_scale=lambda shape: np.random.uniform(1e2, 2e3, shape),
         ):
         self._generate_params(
             self,
diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
index 9b360cd7..20fb9ab9 100644
--- a/batchglm/train/tf/base_glm_all/estimator_graph.py
+++ b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -538,10 +538,7 @@ def __init__(
             self.hessians = self.full_data_model.hessians_final
             self.fisher_inv = op_utils.pinv(-self.full_data_model.hessians_final)  # TODO switch for fim?
             # Summary statistics on feature-wise model gradients:
-            a = tf.abs(self.full_data_model.neg_jac_final / num_observations)
-            b = tf.print("gradients: \n", a)
-            with tf.control_dependencies([b]):
-                self.gradients = tf.reduce_sum(a, axis=1)
+            self.gradients = tf.reduce_sum(tf.abs(self.full_data_model.neg_jac_final / num_observations), axis=1)
 
         with tf.name_scope('summaries'):
             if extended_summary:
diff --git a/batchglm/train/tf/glm_bern/estimator.py b/batchglm/train/tf/glm_bern/estimator.py
index 57534d34..11dd5a8a 100644
--- a/batchglm/train/tf/glm_bern/estimator.py
+++ b/batchglm/train/tf/glm_bern/estimator.py
@@ -108,9 +108,6 @@ def __init__(
         if quick_scale:
             self._train_scale = False
 
-        print("init_a: \n \n", init_a)
-        print("init_b: \n \n", init_b)
-
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
                 provide_hessian = True
diff --git a/batchglm/train/tf/glm_beta/estimator.py b/batchglm/train/tf/glm_beta/estimator.py
index a927dd50..1f1ca838 100644
--- a/batchglm/train/tf/glm_beta/estimator.py
+++ b/batchglm/train/tf/glm_beta/estimator.py
@@ -119,11 +119,6 @@ def __init__(
         if quick_scale:
             self._train_scale = False
 
-        print("init_a")
-        print(init_a)
-        print("init_b")
-        print(init_b)
-
         if len(optim_algos) > 0:
             if np.any([x.lower() in ["nr", "nr_tr"] for x in optim_algos]):
                 provide_hessian = True
diff --git a/batchglm/train/tf/glm_beta/hessians.py b/batchglm/train/tf/glm_beta/hessians.py
index 066ebe4f..537a50f1 100644
--- a/batchglm/train/tf/glm_beta/hessians.py
+++ b/batchglm/train/tf/glm_beta/hessians.py
@@ -65,12 +65,14 @@ def _weight_hessian_bb(
         scalar_one = tf.constant(1, shape=(), dtype=self.dtype)
 
         if isinstance(X, tf.SparseTensor) or isinstance(X, tf.SparseTensorValue):
-            const1 = tf.log(tf.sparse.to_dense(X) / -tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype)))
+            one_minus_X = - tf.sparse.add(X, -tf.ones(shape=X.dense_shape, dtype=self.dtype))
+            Xdense = tf.sparse.to_dense(X)
         else:
-            const1 = tf.log(X / (tf.ones_like(X) - X))
+            one_minus_X = tf.ones_like(X) - X
+            Xdense = X
 
-        const2 = loc * (tf.log(X) - tf.digamma(loc_times_scale))\
-                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(const1)) \
+        const2 = loc * (tf.log(Xdense) - tf.digamma(loc_times_scale))\
+                 - one_minus_loc * (tf.digamma(one_minus_loc_times_scale) + tf.log(one_minus_X)) \
                  + tf.digamma(scale)
         const3 = scale * (- tf.square(loc) * tf.polygamma(scalar_one, loc_times_scale)\
                           + tf.polygamma(scalar_one, scale)\
diff --git a/batchglm/train/tf/glm_beta/model.py b/batchglm/train/tf/glm_beta/model.py
index 170cf403..04330a85 100644
--- a/batchglm/train/tf/glm_beta/model.py
+++ b/batchglm/train/tf/glm_beta/model.py
@@ -31,11 +31,9 @@ def param_bounds(
 
         sf = dtype(pkg_constants.ACCURACY_MARGIN_RELATIVE_TO_LIMIT)
         bounds_min = {
-            #"a_var": np.log(zero/(1-zero)) / sf,
-            "a_var": dmin,
+            "a_var": np.log(zero/(1-zero)) / sf,
             "b_var": np.log(zero) / sf,
-            #"eta_loc": np.log(zero/(1-zero)) / sf,
-            "eta_loc": dmin,
+            "eta_loc": np.log(zero/(1-zero)) / sf,
             "eta_scale": np.log(zero) / sf,
             "mean": zero,
             "samplesize": zero,
@@ -43,11 +41,9 @@ def param_bounds(
             "log_probs": np.log(zero),
         }
         bounds_max = {
-            #"a_var": np.log(one/(1-one)) / sf,
-            "a_var": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
+            "a_var": np.log(one/(1-one)) / sf,
             "b_var": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
-            #"eta_loc": np.log(one/(1-one)) / sf,
-            "eta_loc": np.nextafter(np.log(one/(1-one)), -np.inf, dtype=dtype),
+            "eta_loc": np.log(one/(1-one)) / sf,
             "eta_scale": np.nextafter(np.log(dmax), -np.inf, dtype=dtype) / sf,
             "mean": one,
             "samplesize": np.nextafter(dmax, -np.inf, dtype=dtype) / sf,
@@ -107,18 +103,10 @@ def __init__(
             Xdense = X
 
         one_minus_loc = tf.ones_like(model_loc) - model_loc
-        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale)\
-                    - tf.lgamma(one_minus_loc * model_scale)\
-                    + (model_scale * model_loc - tf.ones_like(model_loc)) * tf.log(Xdense)\
+        log_probs = tf.lgamma(model_scale) - tf.lgamma(model_loc * model_scale) - tf.lgamma(one_minus_loc * model_scale) \
+                    + (model_scale * model_loc - tf.ones_like(model_loc)) * tf.log(Xdense) \
                     + (one_minus_loc * model_scale - tf.ones_like(model_loc)) * tf.log(one_minus_X)
-        a = tf.print("log_probs: \n", log_probs)
-        b = tf.print("model_loc: \n", model_loc)
-        c = tf.print("model_scale: \n", model_scale)
-        d = tf.print("X: \n", X)
-        e = tf.print("a_var: \n", a_var)
-        f = tf.print("eta_loc: \n", eta_loc)
-        with tf.control_dependencies([a, b, c, d, e, f]):
-            log_probs = self.tf_clip_param(log_probs, "log_probs")
+        log_probs = self.tf_clip_param(log_probs, "log_probs")
 
         # Variance:
         sigma2 = (model_loc * one_minus_loc) / (tf.ones_like(model_loc) + model_scale)
diff --git a/batchglm/unit_test/base_glm/__init__.py b/batchglm/unit_test/base_glm/__init__.py
index a2ad1f12..e2200222 100644
--- a/batchglm/unit_test/base_glm/__init__.py
+++ b/batchglm/unit_test/base_glm/__init__.py
@@ -1,5 +1,4 @@
 from .test_acc_glm import Test_Accuracy_GLM, _Test_Accuracy_GLM_Estim
-from .test_acc_analytic_glm import Test_AccuracyAnalytic_GLM, _Test_AccuracyAnalytic_GLM_Estim
 from .test_acc_constrained_vglm import Test_AccuracyConstrained_VGLM, _Test_AccuracyConstrained_VGLM_Estim
 from .test_acc_sizefactors_glm import Test_AccuracySizeFactors_GLM, _Test_AccuracySizeFactors_GLM_Estim
 from .test_graph_glm import Test_Graph_GLM, _Test_Graph_GLM_Estim
diff --git a/batchglm/unit_test/base_glm/test_acc_analytic_glm.py b/batchglm/unit_test/base_glm/test_acc_analytic_glm.py
deleted file mode 100644
index b0aba42c..00000000
--- a/batchglm/unit_test/base_glm/test_acc_analytic_glm.py
+++ /dev/null
@@ -1,198 +0,0 @@
-import abc
-import logging
-from typing import List
-import unittest
-import numpy as np
-
-import batchglm.api as glm
-from batchglm.models.base_glm import _Estimator_GLM, _Simulator_GLM
-
-glm.setup_logging(verbosity="WARNING", stream="STDOUT")
-logger = logging.getLogger(__name__)
-
-
-class _Test_AccuracyAnalytic_GLM_Estim():
-
-    def __init__(
-            self,
-            estimator: _Estimator_GLM,
-            simulator: _Simulator_GLM
-    ):
-        self.estimator = estimator
-        self.sim = simulator
-
-    def estimate(self):
-        self.estimator.initialize()
-        self.estimator.train_sequence(training_strategy=[
-            {
-                "learning_rate": 1,
-                "convergence_criteria": "all_converged_ll",
-                "stopping_criteria": 1e-6,
-                "use_batching": False,
-                "optim_algo": "irls_gd_tr",
-            },
-        ])
-
-    def eval_estimation_a(
-            self,
-            estimator_store,
-            init
-    ):
-        threshold_dev = 1e-2
-        threshold_std = 1e-1
-
-        if init == "standard":
-            mean_dev = np.mean(estimator_store.a[0, :] - self.sim.a[0, :])
-            std_dev = np.std(estimator_store.a[0, :] - self.sim.a[0, :])
-        elif init == "closed_form":
-            mean_dev = np.mean(estimator_store.a - self.sim.a)
-            std_dev = np.std(estimator_store.a - self.sim.a)
-        else:
-            assert False
-
-        logging.getLogger("batchglm").info("mean_dev_a %f" % mean_dev)
-        logging.getLogger("batchglm").info("std_dev_a %f" % std_dev)
-
-        if np.abs(mean_dev) < threshold_dev and \
-                std_dev < threshold_std:
-            return True
-        else:
-            return False
-
-    def eval_estimation_b(
-            self,
-            estimator_store,
-            init
-    ):
-        threshold_dev = 1e-2
-        threshold_std = 12-1
-
-        if init == "standard":
-            mean_dev = np.mean(estimator_store.b[0, :] - self.sim.b[0, :])
-            std_dev = np.std(estimator_store.b[0, :] - self.sim.b[0, :])
-        elif init == "closed_form":
-            mean_dev = np.mean(estimator_store.b - self.sim.b)
-            std_dev = np.std(estimator_store.b - self.sim.b)
-        else:
-            assert False
-
-        logging.getLogger("batchglm").info("mean_dev_b %f" % mean_dev)
-        logging.getLogger("batchglm").info("std_dev_b %f" % std_dev)
-
-        if np.abs(mean_dev) < threshold_dev and \
-                std_dev < threshold_std:
-            return True
-        else:
-            return False
-
-
-class Test_AccuracyAnalytic_GLM(unittest.TestCase, metaclass=abc.ABCMeta):
-    """
-    Test whether analytic solutions yield exact results.
-
-    Accuracy is evaluted via deviation of simulated ground truth.
-    The analytic solution is independent of the optimizer and batching
-    and therefore only tested for one example each.
-
-    - full data model
-        - train a model only: test_a_analytic()
-        - train b model only: test_b_analytic()
-
-    The unit tests throw an assertion error if the required accurcy is
-    not met.
-    """
-    _estims: List[_Test_AccuracyAnalytic_GLM_Estim]
-
-    def setUp(self):
-        self._estims = []
-
-    def tearDown(self):
-        for e in self._estims:
-            e.estimator.close_session()
-
-    @abc.abstractmethod
-    def get_simulator(self):
-        pass
-
-    def simulate_complex(self):
-        self.sim = self.get_simulator()
-        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
-        self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2*1e5, shape),
-            rand_fn_loc=lambda shape: np.random.uniform(1, 3, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1, 3, shape)
-        )
-        self.sim.generate_data()
-
-    def simulate_a_easy(self):
-        self.sim = self.get_simulator()
-        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
-
-        self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2 * 1e5, shape),
-            rand_fn_loc=lambda shape: np.ones(shape),
-            rand_fn_scale=lambda shape: np.random.uniform(5, 20, shape)
-        )
-        self.sim.generate_data()
-
-    def simulate_b_easy(self):
-        self.sim = self.get_simulator()
-        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
-
-        def rand_fn_standard(shape):
-            theta = np.ones(shape)
-            theta[0, :] = np.random.uniform(5, 20, shape[1])
-            return theta
-
-        self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2 * 1e5, shape),
-            rand_fn_loc=lambda shape: np.random.uniform(5, 20, shape),
-            rand_fn_scale=lambda shape: rand_fn_standard(shape)
-        )
-        self.sim.generate_data()
-
-    def simulate_a_b_easy(self):
-        self.sim = self.get_simulator()
-        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
-
-        def rand_fn_standard(shape):
-            theta = np.ones(shape)
-            theta[0, :] = np.random.uniform(5, 20, shape[1])
-            return theta
-
-        self.sim.generate_params(
-            rand_fn_ave=lambda shape: np.random.uniform(1e5, 2 * 1e5, shape),
-            rand_fn_loc=lambda shape: np.ones(shape),
-            rand_fn_scale=lambda shape: rand_fn_standard(shape)
-        )
-        self.sim.generate_data()
-
-    @abc.abstractmethod
-    def get_estimator(self, train_scale, sparse, init_a, init_b):
-        pass
-
-    def _test_a_and_b(self, sparse, init_a, init_b):
-        estimator = self.get_estimator(
-            train_scale=False,
-            sparse=sparse,
-            init_a=init_a,
-            init_b=init_b
-        )
-        estimator.estimate()
-        estimator_store = estimator.estimator.finalize()
-        self._estims.append(estimator)
-        success = estimator.eval_estimation_a(
-            estimator_store=estimator_store,
-            init=init_a
-        )
-        assert success, "estimation for a_model was inaccurate"
-        success = estimator.eval_estimation_b(
-            estimator_store=estimator_store,
-            init=init_b
-        )
-        assert success, "estimation for b_model was inaccurate"
-        return True
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/batchglm/unit_test/base_glm/test_acc_glm.py b/batchglm/unit_test/base_glm/test_acc_glm.py
index 5531e396..8c1c4cce 100644
--- a/batchglm/unit_test/base_glm/test_acc_glm.py
+++ b/batchglm/unit_test/base_glm/test_acc_glm.py
@@ -38,7 +38,7 @@ def estimate(
         self.estimator.train_sequence(training_strategy=[
             {
                 "learning_rate": lr,
-                "convergence_criteria": "all_converged_ll",
+                "convergence_criteria": "all_converged",
                 "stopping_criteria": acc,
                 "use_batching": batched,
                 "optim_algo": algo,
diff --git a/batchglm/unit_test/glm_all/external.py b/batchglm/unit_test/glm_all/external.py
index be8b1be9..3865a988 100644
--- a/batchglm/unit_test/glm_all/external.py
+++ b/batchglm/unit_test/glm_all/external.py
@@ -1,5 +1,4 @@
 from batchglm.unit_test.base_glm import Test_Accuracy_GLM, _Test_Accuracy_GLM_Estim
-from batchglm.unit_test.base_glm import Test_AccuracyAnalytic_GLM, _Test_AccuracyAnalytic_GLM_Estim
 from batchglm.unit_test.base_glm import Test_AccuracyConstrained_VGLM, _Test_AccuracyConstrained_VGLM_Estim
 from batchglm.unit_test.base_glm import Test_AccuracySizeFactors_GLM, _Test_AccuracySizeFactors_GLM_Estim
 from batchglm.unit_test.base_glm import Test_Graph_GLM, _Test_Graph_GLM_Estim
diff --git a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py
index 0f24a541..562cd521 100644
--- a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all.py
@@ -1,18 +1,21 @@
+import logging
 from typing import List
 import unittest
-import logging
+import numpy as np
 import scipy.sparse
 
 import batchglm.api as glm
-from batchglm.models.base_glm import _Estimator_GLM
-
-from .external import Test_AccuracyAnalytic_GLM, _Test_AccuracyAnalytic_GLM_Estim
+from batchglm.models.base_glm import _Estimator_GLM, _Simulator_GLM
 
 glm.setup_logging(verbosity="WARNING", stream="STDOUT")
 logger = logging.getLogger(__name__)
 
 
-class _Test_AccuracyAnalytic_GLM_ALL_Estim(_Test_AccuracyAnalytic_GLM_Estim):
+class _Test_AccuracyAnalytic_GLM_ALL_Estim():
+
+    estimator: _Estimator_GLM
+    simulator: _Simulator_GLM
+    noise_model: str
 
     def __init__(
             self,
@@ -23,6 +26,9 @@ def __init__(
             init_a,
             init_b
     ):
+        self.simulator = simulator
+        self.noise_model = noise_model
+
         if noise_model is None:
             raise ValueError("noise_model is None")
         else:
@@ -30,13 +36,19 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
+            elif noise_model=="bern":
+                from batchglm.api.models.glm_bern import Estimator, InputData
             else:
                 raise ValueError("noise_model not recognized")
 
         batch_size = 500
         provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True,
-                              "nr": True, "nr_tr": True,
-                              "irls": True, "irls_gd": True, "irls_tr": True, "irls_gd_tr": True}
+                              "nr": False, "nr_tr": False,
+                              "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}
 
         if sparse:
             input_data = InputData.new(
@@ -51,23 +63,126 @@ def __init__(
                 design_scale=simulator.input_data.design_scale
             )
 
-        estimator = Estimator(
+        self.estimator = Estimator(
             input_data=input_data,
             batch_size=batch_size,
             quick_scale=not train_scale,
             provide_optimizers=provide_optimizers,
             provide_batched=True,
+            provide_fim=False,
+            provide_hessian=False,
             init_a=init_a,
             init_b=init_b
         )
-        super().__init__(
-            estimator=estimator,
-            simulator=simulator
-        )
+
+    def estimate(self):
+        self.estimator.initialize()
+        self.estimator.train_sequence(training_strategy=[
+            {
+                "convergence_criteria": "all_converged",
+                "use_batching": False,
+                "optim_algo": "gd",
+            },
+        ])
+
+    def eval_estimation_a(
+            self,
+            estimator_store,
+            init_a,
+    ):
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model == "nb":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model=="norm":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model=="beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model=="beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model=="bern":
+                threshold_dev = 1e-1
+                threshold_std = 1e-1
+            else:
+                raise ValueError("noise_model not recognized")
+
+        print("estimation: \n", estimator_store.a)
+        print("simulator: \n", self.simulator.a)
+
+        if init_a == "standard":
+            mean_dev = np.mean(estimator_store.a[0, :] - self.simulator.a[0, :])
+            std_dev = np.std(estimator_store.a[0, :] - self.simulator.a[0, :])
+        elif init_a == "closed_form":
+            mean_dev = np.mean(estimator_store.a - self.simulator.a)
+            std_dev = np.std(estimator_store.a - self.simulator.a)
+        else:
+            assert False
+
+        logging.getLogger("batchglm").info("mean_dev_a %f" % mean_dev)
+        logging.getLogger("batchglm").info("std_dev_a %f" % std_dev)
+
+        if np.abs(mean_dev) < threshold_dev and \
+                std_dev < threshold_std:
+            return True
+        else:
+            return False
+
+    def eval_estimation_b(
+            self,
+            estimator_store,
+            init_b
+    ):
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model == "nb":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model == "norm":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model == "beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model == "beta":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            elif self.noise_model == "bern":
+                threshold_dev = 1e-2
+                threshold_std = 1e-1
+            else:
+                raise ValueError("noise_model not recognized")
+
+        print("estimation: \n", estimator_store.b)
+        print("simulator: \n", self.simulator.b)
+
+        if init_b == "standard":
+            mean_dev = np.mean(estimator_store.b[0, :] - self.simulator.b[0, :])
+            std_dev = np.std(estimator_store.b[0, :] - self.simulator.b[0, :])
+        elif init_b == "closed_form":
+            mean_dev = np.mean(estimator_store.b - self.simulator.b)
+            std_dev = np.std(estimator_store.b - self.simulator.b)
+        else:
+            assert False
+
+        logging.getLogger("batchglm").info("mean_dev_b %f" % mean_dev)
+        logging.getLogger("batchglm").info("std_dev_b %f" % std_dev)
+
+        if np.abs(mean_dev) < threshold_dev and \
+                std_dev < threshold_std:
+            return True
+        else:
+            return False
+
+
 
 
 class Test_AccuracyAnalytic_GLM_ALL(
-    Test_AccuracyAnalytic_GLM,
     unittest.TestCase
 ):
     noise_model: str
@@ -81,6 +196,10 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="beta":
+                from batchglm.api.models.glm_beta import Simulator
+            elif self.noise_model=="bern":
+                from batchglm.api.models.glm_bern import Simulator
             else:
                 raise ValueError("noise_model not recognized")
 
@@ -99,17 +218,106 @@ def get_estimator(self, train_scale, sparse, init_a, init_b):
             init_b=init_b
         )
 
-    def _test_a_closed_b_closed(self, sparse):
-        self._test_a_and_b(sparse=sparse, init_a="closed_form", init_b="closed_form")
+    def simulate_complex(self):
+        self.sim = self.get_simulator()
+        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
+
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model=="nb":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="norm":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
+            elif self.noise_model=="bern":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
+                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
+            else:
+                raise ValueError("noise_model not recognized")
+
+        self.sim.generate_params(
+            rand_fn_ave=rand_fn_ave,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale
+        )
+        self.sim.generate_data()
+
+    def simulate_easy(self):
+        self.sim = self.get_simulator()
+        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
+
+        def rand_fn_standard(shape):
+            theta = np.ones(shape)
+            theta[0, :] = np.random.uniform(5, 20, shape[1])
+            return theta
+
+        if self.noise_model is None:
+            raise ValueError("noise_model is None")
+        else:
+            if self.noise_model=="nb":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.ones(shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
+            elif self.noise_model=="norm":
+                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
+                rand_fn_loc = lambda shape: np.ones(shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
+            elif self.noise_model=="beta":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
+                rand_fn_scale = lambda shape: rand_fn_standard(shape)
+            elif self.noise_model=="bern":
+                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
+                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
+                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
+            else:
+                raise ValueError("noise_model not recognized")
+
+        self.sim.generate_params(
+            rand_fn_ave=rand_fn_ave,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale
+        )
+        self.sim.generate_data()
+
+    def setUp(self):
+        self._estims = []
 
-    def _test_a_closed_b_standard(self, sparse):
-        self._test_a_and_b(sparse=sparse, init_a="closed_form", init_b="standard")
+    def tearDown(self):
+        for e in self._estims:
+            e.estimator.close_session()
 
-    def _test_a_standard_b_closed(self, sparse):
-        self._test_a_and_b(sparse=sparse, init_a="standard", init_b="closed_form")
+    def _test_a_and_b(self, sparse, init_a, init_b):
+        estimator = self.get_estimator(
+            train_scale=False,
+            sparse=sparse,
+            init_a=init_a,
+            init_b=init_b
+        )
+        estimator.estimate()
+        estimator_store = estimator.estimator.finalize()
+        self._estims.append(estimator)
+        success = estimator.eval_estimation_a(
+            estimator_store=estimator_store,
+            init_a=init_a,
 
-    def _test_a_standard_b_standard(self, sparse):
-        self._test_a_and_b(sparse=sparse, init_a="standard", init_b="standard")
+        )
+        assert success, "estimation for a_model was inaccurate"
+        success = estimator.eval_estimation_b(
+            estimator_store=estimator_store,
+            init_b=init_b
+        )
+        assert success, "estimation for b_model was inaccurate"
+        return True
 
 
 class Test_AccuracyAnalytic_GLM_NB(
@@ -127,8 +335,8 @@ def test_a_closed_b_closed(self):
 
         self.noise_model = "nb"
         self.simulate_complex()
-        self._test_a_closed_b_closed(sparse=False)
-        self._test_a_closed_b_closed(sparse=True)
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
 
     def test_a_standard_b_standard(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
@@ -136,9 +344,9 @@ def test_a_standard_b_standard(self):
         logger.error("Test_AccuracyAnalytic_GLM_NB.test_a_standard_b_standard()")
 
         self.noise_model = "nb"
-        self.simulate_a_b_easy()
-        self._test_a_standard_b_standard(sparse=False)
-        self._test_a_standard_b_standard(sparse=True)
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
 
 
 class Test_AccuracyAnalytic_GLM_NORM(
@@ -150,14 +358,14 @@ class Test_AccuracyAnalytic_GLM_NORM(
     """
 
     def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR),
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.INFO)
         logger.error("Test_AccuracyAnalytic_GLM_NORM.test_a_closed_b_closed()")
 
         self.noise_model = "norm"
         self.simulate_complex()
-        self._test_a_closed_b_closed(sparse=False)
-        self._test_a_closed_b_closed(sparse=True)
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
 
     def test_a_standard_b_standard(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
@@ -165,9 +373,65 @@ def test_a_standard_b_standard(self):
         logger.error("Test_AccuracyAnalytic_GLM_NORM.test_a_standard_b_standard()")
 
         self.noise_model = "norm"
-        self.simulate_a_b_easy()
-        self._test_a_standard_b_standard(sparse=False)
-        self._test_a_standard_b_standard(sparse=True)
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+
+class Test_AccuracyAnalytic_GLM_BETA(
+    Test_AccuracyAnalytic_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for beta distributed noise.
+    """
+
+    def test_a_closed_b_closed(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_closed_b_closed()")
+
+        self.noise_model = "beta"
+        self.simulate_complex()
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+
+    def test_a_standard_b_standard(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_standard_b_standard()")
+
+        self.noise_model = "beta"
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
+
+class Test_AccuracyAnalytic_GLM_BERN(
+    Test_AccuracyAnalytic_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for bernoulli distributed noise.
+    """
+
+    def test_a_closed_b_closed(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_closed_b_closed()")
+
+        self.noise_model = "bern"
+        self.simulate_complex()
+        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
+        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
+
+    def test_a_standard_b_standard(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.INFO)
+        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_standard_b_standard()")
+
+        self.noise_model = "bern"
+        self.simulate_easy()
+        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
+        #self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
 
 
 if __name__ == '__main__':
diff --git a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py b/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
deleted file mode 100644
index 562cd521..00000000
--- a/batchglm/unit_test/glm_all/test_acc_analytic_glm_all_2.py
+++ /dev/null
@@ -1,438 +0,0 @@
-import logging
-from typing import List
-import unittest
-import numpy as np
-import scipy.sparse
-
-import batchglm.api as glm
-from batchglm.models.base_glm import _Estimator_GLM, _Simulator_GLM
-
-glm.setup_logging(verbosity="WARNING", stream="STDOUT")
-logger = logging.getLogger(__name__)
-
-
-class _Test_AccuracyAnalytic_GLM_ALL_Estim():
-
-    estimator: _Estimator_GLM
-    simulator: _Simulator_GLM
-    noise_model: str
-
-    def __init__(
-            self,
-            simulator,
-            train_scale,
-            noise_model,
-            sparse,
-            init_a,
-            init_b
-    ):
-        self.simulator = simulator
-        self.noise_model = noise_model
-
-        if noise_model is None:
-            raise ValueError("noise_model is None")
-        else:
-            if noise_model == "nb":
-                from batchglm.api.models.glm_nb import Estimator, InputData
-            elif noise_model=="norm":
-                from batchglm.api.models.glm_norm import Estimator, InputData
-            elif noise_model=="beta":
-                from batchglm.api.models.glm_beta import Estimator, InputData
-            elif noise_model=="beta":
-                from batchglm.api.models.glm_beta import Estimator, InputData
-            elif noise_model=="bern":
-                from batchglm.api.models.glm_bern import Estimator, InputData
-            else:
-                raise ValueError("noise_model not recognized")
-
-        batch_size = 500
-        provide_optimizers = {"gd": True, "adam": True, "adagrad": True, "rmsprop": True,
-                              "nr": False, "nr_tr": False,
-                              "irls": False, "irls_gd": False, "irls_tr": False, "irls_gd_tr": False}
-
-        if sparse:
-            input_data = InputData.new(
-                data=scipy.sparse.csr_matrix(simulator.input_data.X),
-                design_loc=simulator.input_data.design_loc,
-                design_scale=simulator.input_data.design_scale
-            )
-        else:
-            input_data = InputData.new(
-                data=simulator.input_data.X,
-                design_loc=simulator.input_data.design_loc,
-                design_scale=simulator.input_data.design_scale
-            )
-
-        self.estimator = Estimator(
-            input_data=input_data,
-            batch_size=batch_size,
-            quick_scale=not train_scale,
-            provide_optimizers=provide_optimizers,
-            provide_batched=True,
-            provide_fim=False,
-            provide_hessian=False,
-            init_a=init_a,
-            init_b=init_b
-        )
-
-    def estimate(self):
-        self.estimator.initialize()
-        self.estimator.train_sequence(training_strategy=[
-            {
-                "convergence_criteria": "all_converged",
-                "use_batching": False,
-                "optim_algo": "gd",
-            },
-        ])
-
-    def eval_estimation_a(
-            self,
-            estimator_store,
-            init_a,
-    ):
-        if self.noise_model is None:
-            raise ValueError("noise_model is None")
-        else:
-            if self.noise_model == "nb":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model=="norm":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model=="beta":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model=="beta":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model=="bern":
-                threshold_dev = 1e-1
-                threshold_std = 1e-1
-            else:
-                raise ValueError("noise_model not recognized")
-
-        print("estimation: \n", estimator_store.a)
-        print("simulator: \n", self.simulator.a)
-
-        if init_a == "standard":
-            mean_dev = np.mean(estimator_store.a[0, :] - self.simulator.a[0, :])
-            std_dev = np.std(estimator_store.a[0, :] - self.simulator.a[0, :])
-        elif init_a == "closed_form":
-            mean_dev = np.mean(estimator_store.a - self.simulator.a)
-            std_dev = np.std(estimator_store.a - self.simulator.a)
-        else:
-            assert False
-
-        logging.getLogger("batchglm").info("mean_dev_a %f" % mean_dev)
-        logging.getLogger("batchglm").info("std_dev_a %f" % std_dev)
-
-        if np.abs(mean_dev) < threshold_dev and \
-                std_dev < threshold_std:
-            return True
-        else:
-            return False
-
-    def eval_estimation_b(
-            self,
-            estimator_store,
-            init_b
-    ):
-        if self.noise_model is None:
-            raise ValueError("noise_model is None")
-        else:
-            if self.noise_model == "nb":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model == "norm":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model == "beta":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model == "beta":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            elif self.noise_model == "bern":
-                threshold_dev = 1e-2
-                threshold_std = 1e-1
-            else:
-                raise ValueError("noise_model not recognized")
-
-        print("estimation: \n", estimator_store.b)
-        print("simulator: \n", self.simulator.b)
-
-        if init_b == "standard":
-            mean_dev = np.mean(estimator_store.b[0, :] - self.simulator.b[0, :])
-            std_dev = np.std(estimator_store.b[0, :] - self.simulator.b[0, :])
-        elif init_b == "closed_form":
-            mean_dev = np.mean(estimator_store.b - self.simulator.b)
-            std_dev = np.std(estimator_store.b - self.simulator.b)
-        else:
-            assert False
-
-        logging.getLogger("batchglm").info("mean_dev_b %f" % mean_dev)
-        logging.getLogger("batchglm").info("std_dev_b %f" % std_dev)
-
-        if np.abs(mean_dev) < threshold_dev and \
-                std_dev < threshold_std:
-            return True
-        else:
-            return False
-
-
-
-
-class Test_AccuracyAnalytic_GLM_ALL(
-    unittest.TestCase
-):
-    noise_model: str
-    _estims: List[_Estimator_GLM]
-
-    def get_simulator(self):
-        if self.noise_model is None:
-            raise ValueError("noise_model is None")
-        else:
-            if self.noise_model=="nb":
-                from batchglm.api.models.glm_nb import Simulator
-            elif self.noise_model=="norm":
-                from batchglm.api.models.glm_norm import Simulator
-            elif self.noise_model=="beta":
-                from batchglm.api.models.glm_beta import Simulator
-            elif self.noise_model=="bern":
-                from batchglm.api.models.glm_bern import Simulator
-            else:
-                raise ValueError("noise_model not recognized")
-
-        return Simulator(
-            num_observations=10000,
-            num_features=3
-        )
-
-    def get_estimator(self, train_scale, sparse, init_a, init_b):
-        return _Test_AccuracyAnalytic_GLM_ALL_Estim(
-            simulator=self.sim,
-            train_scale=train_scale,
-            noise_model=self.noise_model,
-            sparse=sparse,
-            init_a=init_a,
-            init_b=init_b
-        )
-
-    def simulate_complex(self):
-        self.sim = self.get_simulator()
-        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
-
-        if self.noise_model is None:
-            raise ValueError("noise_model is None")
-        else:
-            if self.noise_model=="nb":
-                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
-                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
-                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
-            elif self.noise_model=="norm":
-                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
-                rand_fn_loc = lambda shape: np.random.uniform(1, 3, shape)
-                rand_fn_scale = lambda shape: np.random.uniform(1, 3, shape)
-            elif self.noise_model=="beta":
-                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
-                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
-                rand_fn_scale = lambda shape: np.random.uniform(10, 30, shape)
-            elif self.noise_model=="bern":
-                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
-                rand_fn_loc = lambda shape: np.random.uniform(0.35, 0.45, shape)
-                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
-            else:
-                raise ValueError("noise_model not recognized")
-
-        self.sim.generate_params(
-            rand_fn_ave=rand_fn_ave,
-            rand_fn_loc=rand_fn_loc,
-            rand_fn_scale=rand_fn_scale
-        )
-        self.sim.generate_data()
-
-    def simulate_easy(self):
-        self.sim = self.get_simulator()
-        self.sim.generate_sample_description(num_batches=1, num_conditions=2)
-
-        def rand_fn_standard(shape):
-            theta = np.ones(shape)
-            theta[0, :] = np.random.uniform(5, 20, shape[1])
-            return theta
-
-        if self.noise_model is None:
-            raise ValueError("noise_model is None")
-        else:
-            if self.noise_model=="nb":
-                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
-                rand_fn_loc = lambda shape: np.ones(shape)
-                rand_fn_scale = lambda shape: rand_fn_standard(shape)
-            elif self.noise_model=="norm":
-                rand_fn_ave = lambda shape: np.random.uniform(1e5, 2 * 1e5, shape)
-                rand_fn_loc = lambda shape: np.ones(shape)
-                rand_fn_scale = lambda shape: rand_fn_standard(shape)
-            elif self.noise_model=="beta":
-                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
-                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
-                rand_fn_scale = lambda shape: rand_fn_standard(shape)
-            elif self.noise_model=="bern":
-                rand_fn_ave = lambda shape: np.random.uniform(0.3, 0.4, shape)
-                rand_fn_loc = lambda shape: 0.5*np.ones(shape)
-                rand_fn_scale = lambda shape: np.random.uniform(0, 0, shape)
-            else:
-                raise ValueError("noise_model not recognized")
-
-        self.sim.generate_params(
-            rand_fn_ave=rand_fn_ave,
-            rand_fn_loc=rand_fn_loc,
-            rand_fn_scale=rand_fn_scale
-        )
-        self.sim.generate_data()
-
-    def setUp(self):
-        self._estims = []
-
-    def tearDown(self):
-        for e in self._estims:
-            e.estimator.close_session()
-
-    def _test_a_and_b(self, sparse, init_a, init_b):
-        estimator = self.get_estimator(
-            train_scale=False,
-            sparse=sparse,
-            init_a=init_a,
-            init_b=init_b
-        )
-        estimator.estimate()
-        estimator_store = estimator.estimator.finalize()
-        self._estims.append(estimator)
-        success = estimator.eval_estimation_a(
-            estimator_store=estimator_store,
-            init_a=init_a,
-
-        )
-        assert success, "estimation for a_model was inaccurate"
-        success = estimator.eval_estimation_b(
-            estimator_store=estimator_store,
-            init_b=init_b
-        )
-        assert success, "estimation for b_model was inaccurate"
-        return True
-
-
-class Test_AccuracyAnalytic_GLM_NB(
-    Test_AccuracyAnalytic_GLM_ALL,
-    unittest.TestCase
-):
-    """
-    Test whether optimizers yield exact results for negative binomial noise.
-    """
-
-    def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_NB.test_a_closed_b_closed()")
-
-        self.noise_model = "nb"
-        self.simulate_complex()
-        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
-        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
-
-    def test_a_standard_b_standard(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_NB.test_a_standard_b_standard()")
-
-        self.noise_model = "nb"
-        self.simulate_easy()
-        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
-        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
-
-
-class Test_AccuracyAnalytic_GLM_NORM(
-    Test_AccuracyAnalytic_GLM_ALL,
-    unittest.TestCase
-):
-    """
-    Test whether optimizers yield exact results for normally distributed noise.
-    """
-
-    def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_NORM.test_a_closed_b_closed()")
-
-        self.noise_model = "norm"
-        self.simulate_complex()
-        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
-        self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
-
-    def test_a_standard_b_standard(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_NORM.test_a_standard_b_standard()")
-
-        self.noise_model = "norm"
-        self.simulate_easy()
-        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
-        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
-
-class Test_AccuracyAnalytic_GLM_BETA(
-    Test_AccuracyAnalytic_GLM_ALL,
-    unittest.TestCase
-):
-    """
-    Test whether optimizers yield exact results for beta distributed noise.
-    """
-
-    def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_closed_b_closed()")
-
-        self.noise_model = "beta"
-        self.simulate_complex()
-        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
-        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
-
-    def test_a_standard_b_standard(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_BETA.test_a_standard_b_standard()")
-
-        self.noise_model = "beta"
-        self.simulate_easy()
-        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
-        self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
-
-class Test_AccuracyAnalytic_GLM_BERN(
-    Test_AccuracyAnalytic_GLM_ALL,
-    unittest.TestCase
-):
-    """
-    Test whether optimizers yield exact results for bernoulli distributed noise.
-    """
-
-    def test_a_closed_b_closed(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_closed_b_closed()")
-
-        self.noise_model = "bern"
-        self.simulate_complex()
-        self._test_a_and_b(sparse=False, init_a="closed_form", init_b="closed_form")
-        #self._test_a_and_b(sparse=True, init_a="closed_form", init_b="closed_form")
-
-    def test_a_standard_b_standard(self):
-        logging.getLogger("tensorflow").setLevel(logging.ERROR)
-        logging.getLogger("batchglm").setLevel(logging.INFO)
-        logger.error("Test_AccuracyAnalytic_GLM_BERN.test_a_standard_b_standard()")
-
-        self.noise_model = "bern"
-        self.simulate_easy()
-        self._test_a_and_b(sparse=False, init_a="standard", init_b="standard")
-        #self._test_a_and_b(sparse=True, init_a="standard", init_b="standard")
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/batchglm/unit_test/glm_all/test_acc_glm_all.py b/batchglm/unit_test/glm_all/test_acc_glm_all.py
index cdab93e5..8a76eb1a 100644
--- a/batchglm/unit_test/glm_all/test_acc_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_acc_glm_all.py
@@ -29,6 +29,8 @@ def __init__(
                 from batchglm.api.models.glm_nb import Estimator, InputData
             elif noise_model=="norm":
                 from batchglm.api.models.glm_norm import Estimator, InputData
+            elif noise_model=="beta":
+                from batchglm.api.models.glm_beta import Estimator, InputData
             elif noise_model=="bern":
                 from batchglm.api.models.glm_bern import Estimator, InputData
             else:
@@ -59,7 +61,9 @@ def __init__(
             provide_optimizers=provide_optimizers,
             provide_batched=True,
             init_a="standard",
-            init_b="standard"
+            init_b="standard",
+            provide_fim = True,
+            provide_hessian = True,
         )
         super().__init__(
             estimator=estimator,
@@ -104,6 +108,8 @@ def get_simulator(self):
                 from batchglm.api.models.glm_nb import Simulator
             elif self.noise_model=="norm":
                 from batchglm.api.models.glm_norm import Simulator
+            elif self.noise_model=="beta":
+                from batchglm.api.models.glm_beta import Simulator
             elif self.noise_model=="bern":
                 from batchglm.api.models.glm_bern import Simulator
             else:
@@ -181,7 +187,7 @@ class Test_Accuracy_GLM_NORM(
     def test_full_norm(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Accuracy_GLM_NB.test_full_norm()")
+        logger.error("Test_Accuracy_GLM_NORM.test_full_norm()")
 
         self.noise_model = "norm"
         self.simulate()
@@ -191,16 +197,40 @@ def test_full_norm(self):
     def test_batched_norm(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Accuracy_GLM_NB.test_batched_norm()")
+        logger.error("Test_Accuracy_GLM_NORM.test_batched_norm()")
 
         self.noise_model = "norm"
         self.simulate()
         self._test_batched(sparse=False)
         self._test_batched(sparse=True)
 
+class Test_Accuracy_GLM_BETA(
+    Test_Accuracy_GLM_ALL,
+    unittest.TestCase
+):
+    """
+    Test whether optimizers yield exact results for negative binomial noise.
+    """
 
-if __name__ == '__main__':
-    unittest.main()
+    def test_full_beta(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_BETA.test_full_beta()")
+
+        self.noise_model = "beta"
+        self.simulate()
+        self._test_full(sparse=False)
+        self._test_full(sparse=True)
+
+    def test_batched_beta(self):
+        logging.getLogger("tensorflow").setLevel(logging.ERROR)
+        logging.getLogger("batchglm").setLevel(logging.WARNING)
+        logger.error("Test_Accuracy_GLM_BETA.test_batched_beta()")
+
+        self.noise_model = "beta"
+        self.simulate()
+        self._test_batched(sparse=False)
+        self._test_batched(sparse=True)
 
 
 class Test_Accuracy_GLM_BERN(
@@ -214,7 +244,7 @@ class Test_Accuracy_GLM_BERN(
     def test_full_bern(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Accuracy_GLM_NB.test_full_bern()")
+        logger.error("Test_Accuracy_GLM_BERN.test_full_bern()")
 
         self.noise_model = "bern"
         self.simulate()
@@ -224,7 +254,7 @@ def test_full_bern(self):
     def test_batched_bern(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
         logging.getLogger("batchglm").setLevel(logging.WARNING)
-        logger.error("Test_Accuracy_GLM_NB.test_batched_bern()")
+        logger.error("Test_Accuracy_GLM_BERN.test_batched_bern()")
 
         self.noise_model = "bern"
         self.simulate()
diff --git a/batchglm/unit_test/glm_all/test_graph_glm_all.py b/batchglm/unit_test/glm_all/test_graph_glm_all.py
index b36327af..3c585cbc 100644
--- a/batchglm/unit_test/glm_all/test_graph_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_graph_glm_all.py
@@ -214,7 +214,7 @@ def test_full_beta(self):
 
         self.noise_model = "beta"
         self._test_full(sparse=False)
-        #self._test_full(sparse=True)
+        self._test_full(sparse=True)
 
     def test_batched_beta(self):
         logging.getLogger("tensorflow").setLevel(logging.ERROR)
@@ -223,7 +223,7 @@ def test_batched_beta(self):
 
         self.noise_model = "beta"
         self._test_batched(sparse=False)
-        #self._test_batched(sparse=True)
+        self._test_batched(sparse=True)
 
 
 class Test_Graph_GLM_BERN(
diff --git a/batchglm/unit_test/glm_all/test_hessians_glm_all.py b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
index 2a387e25..41ad2297 100644
--- a/batchglm/unit_test/glm_all/test_hessians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_hessians_glm_all.py
@@ -112,7 +112,7 @@ def _test_compute_hessians(self, sparse):
 
         sample_description = data_utils.sample_description_from_xarray(sim.data, dim="observations")
         design_loc = data_utils.design_matrix(sample_description, formula="~ 1 + condition + batch")
-        design_scale = data_utils.design_matrix(sample_description, formula="~ 1 + condition + batch")
+        design_scale = data_utils.design_matrix(sample_description, formula="~ 1 + condition")
 
         if sparse:
             input_data = InputData.new(
@@ -146,10 +146,10 @@ def _test_compute_hessians(self, sparse):
         logging.getLogger("batchglm").info("MAD: %f" % np.max(np.abs((h_tf - h_analytic))))
         logging.getLogger("batchglm").info("MRAD: %f" % np.max(np.abs(h_tf - h_analytic)))
 
-        i = 1
-        print("\n h_tf: \n", h_tf[i, :, :])
-        print("\n h_analytic: \n", h_analytic[i, :, :])
-        print("\n difference: \n", (h_tf[i, :, :] - h_analytic[i, :, :]))
+        # i = 1
+        # print("\n h_tf: \n", h_tf[i, :, :])
+        # print("\n h_analytic: \n", h_analytic[i, :, :])
+        # print("\n difference: \n", (h_tf[i, :, :] - h_analytic[i, :, :]))
 
         # Make sure that hessians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(h_analytic)) > 1e-10, \
diff --git a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
index 22236e3a..1dd668b7 100644
--- a/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
+++ b/batchglm/unit_test/glm_all/test_jacobians_glm_all.py
@@ -139,8 +139,8 @@ def compare_jacs(
         t_tf = t1_tf - t0_tf
 
 
-        print("J_analytic: ", J_analytic)
-        print("J_tf: ", J_tf)
+        # print("J_analytic: ", J_analytic)
+        # print("J_tf: ", J_tf)
 
         # Make sure that jacobians are not all zero which might make evaluation of equality difficult.
         assert np.sum(np.abs(J_analytic)) > 1e-10, \
@@ -155,7 +155,8 @@ def compare_jacs(
         #print(J_analytic)
         #print((J_tf - J_analytic) / J_tf)
 
-        mrad = np.max(np.abs((J_tf - J_analytic) / J_tf))
+        # mrad = np.max(np.abs((J_tf - J_analytic) / J_tf))
+        mrad = np.max(np.abs(J_tf - J_analytic))
         assert mrad < 1e-12, mrad
         return True
 
diff --git a/tutorials/glm_norm.ipynb b/tutorials/glm_norm.ipynb
index be2af931..3b4cca32 100644
--- a/tutorials/glm_norm.ipynb
+++ b/tutorials/glm_norm.ipynb
@@ -76,19 +76,19 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray 'X' (observations: 1000, features: 100)>\n",
-       "array([[136381.377429, 186625.238809, 161525.713344, ..., 182496.906438,\n",
-       "        181262.355823, 192805.589245],\n",
-       "       [136497.769208, 186867.304933, 161667.283264, ..., 182699.889726,\n",
-       "        181355.661108, 192949.718914],\n",
-       "       [136383.256653, 186619.58899 , 161522.772444, ..., 182499.34418 ,\n",
-       "        181243.055505, 192805.625307],\n",
+       "array([[153530.668053, 184471.534877, 169294.048184, ..., 124758.218499,\n",
+       "        163299.072128, 168342.198344],\n",
+       "       [153727.148687, 184660.252114, 169413.785819, ..., 124985.662782,\n",
+       "        163416.764229, 168525.694527],\n",
+       "       [153524.265736, 184464.054546, 169294.235636, ..., 124763.473351,\n",
+       "        163296.463545, 168330.185236],\n",
        "       ...,\n",
-       "       [137092.403002, 186639.049   , 161944.547794, ..., 182873.36988 ,\n",
-       "        181420.200523, 193160.770375],\n",
-       "       [136579.709776, 186803.033004, 161839.651223, ..., 182703.690275,\n",
-       "        181415.959071, 192991.729134],\n",
-       "       [137088.506608, 186717.445438, 161547.221266, ..., 182701.225484,\n",
-       "        181945.100631, 193082.734682]])\n",
+       "       [153823.158262, 185136.831761, 169525.750169, ..., 124938.104856,\n",
+       "        163703.131904, 168728.535786],\n",
+       "       [153620.064816, 184690.47005 , 169416.598402, ..., 124924.478733,\n",
+       "        163443.696657, 168461.014046],\n",
+       "       [153870.35622 , 184581.061892, 169478.678999, ..., 125108.260219,\n",
+       "        163420.133359, 168725.511263]])\n",
        "Dimensions without coordinates: observations, features"
       ]
      },
@@ -110,19 +110,19 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray (observations: 1000, features: 100)>\n",
-       "array([[  7.081342,   7.679191,   9.989066, ...,   3.006988,   7.153044,\n",
-       "          4.354511],\n",
-       "       [ 60.935395,  58.548347,  16.420187, ...,  17.435734,  49.276409,\n",
-       "          8.850265],\n",
-       "       [  7.081342,   7.679191,   9.989066, ...,   3.006988,   7.153044,\n",
-       "          4.354511],\n",
+       "array([[  3.75385 ,   8.645385,   7.135442, ...,   8.487027,   3.7549  ,\n",
+       "          6.458461],\n",
+       "       [ 10.768345,  33.85133 ,  36.596781, ...,  82.445376,  29.838425,\n",
+       "          9.863909],\n",
+       "       [  3.75385 ,   8.645385,   7.135442, ...,   8.487027,   3.7549  ,\n",
+       "          6.458461],\n",
        "       ...,\n",
-       "       [583.350617, 234.053087, 118.719318, ..., 158.027114, 400.083058,\n",
-       "         45.818811],\n",
-       "       [ 67.791559,  30.698362,  72.221781, ...,  27.253545,  58.076708,\n",
-       "         22.543787],\n",
-       "       [583.350617, 234.053087, 118.719318, ..., 158.027114, 400.083058,\n",
-       "         45.818811]])\n",
+       "       [ 35.960157, 246.471101,  90.036937, ..., 130.847114, 155.863109,\n",
+       "         59.458407],\n",
+       "       [ 12.535728,  62.946941,  17.554913, ...,  13.469561,  19.613986,\n",
+       "         38.930793],\n",
+       "       [ 35.960157, 246.471101,  90.036937, ..., 130.847114, 155.863109,\n",
+       "         59.458407]])\n",
        "Dimensions without coordinates: observations, features"
       ]
      },
@@ -144,16 +144,16 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray (design_loc_params: 5, features: 100)>\n",
-       "array([[1.363856e+05, 1.866230e+05, 1.615200e+05, ..., 1.825047e+05,\n",
-       "        1.812539e+05, 1.928044e+05],\n",
-       "       [1.504359e+02, 1.926565e+02, 1.503140e+02, ..., 1.998073e+02,\n",
-       "        1.363696e+02, 1.546470e+02],\n",
-       "       [1.771764e+02, 1.630434e+02, 1.008328e+02, ..., 1.960234e+02,\n",
-       "        1.361614e+02, 1.567535e+02],\n",
-       "       [1.909409e+02, 1.566499e+02, 1.457898e+02, ..., 1.685650e+02,\n",
-       "        1.806504e+02, 1.750800e+02],\n",
-       "       [1.820867e+02, 1.572823e+02, 1.671585e+02, ..., 1.501432e+02,\n",
-       "        1.783009e+02, 1.863868e+02]])\n",
+       "array([[1.535276e+05, 1.844713e+05, 1.692924e+05, ..., 1.247599e+05,\n",
+       "        1.632980e+05, 1.683406e+05],\n",
+       "       [1.941580e+02, 1.618886e+02, 1.078499e+02, ..., 1.376593e+02,\n",
+       "        1.037169e+02, 1.901242e+02],\n",
+       "       [1.113831e+02, 1.690494e+02, 1.244814e+02, ..., 1.450850e+02,\n",
+       "        1.374363e+02, 1.623521e+02],\n",
+       "       [1.941130e+02, 1.235625e+02, 1.801599e+02, ..., 1.154509e+02,\n",
+       "        1.729885e+02, 1.536259e+02],\n",
+       "       [1.067593e+02, 1.453621e+02, 1.029423e+02, ..., 1.707639e+02,\n",
+       "        1.541521e+02, 1.270179e+02]])\n",
        "Coordinates:\n",
        "  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'\n",
        "Dimensions without coordinates: features"
@@ -177,11 +177,11 @@
      "data": {
       "text/plain": [
        "<xarray.DataArray (design_scale_params: 5, features: 100)>\n",
-       "array([[1.957463, 2.038514, 2.301491, ..., 1.100939, 1.967538, 1.471212],\n",
-       "       [2.152351, 2.031339, 0.49702 , ..., 1.757583, 1.929907, 0.709235],\n",
-       "       [2.210487, 2.02778 , 0.979552, ..., 1.617446, 0.610368, 2.297003],\n",
-       "       [1.15282 , 2.089364, 2.280364, ..., 1.595779, 2.034945, 1.737918],\n",
-       "       [2.258974, 1.385695, 1.978251, ..., 2.204245, 2.094227, 1.644247]])\n",
+       "array([[1.322782, 2.157026, 1.965074, ..., 2.138539, 1.323062, 1.865391],\n",
+       "       [1.053829, 1.364953, 1.634886, ..., 2.273597, 2.072735, 0.423491],\n",
+       "       [2.103463, 2.022464, 1.513653, ..., 0.87566 , 2.288657, 1.114935],\n",
+       "       [2.225734, 0.831343, 2.051622, ..., 0.626995, 1.506541, 1.975494],\n",
+       "       [1.205801, 1.985266, 0.90026 , ..., 0.461894, 1.653181, 1.796394]])\n",
        "Coordinates:\n",
        "  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'\n",
        "Dimensions without coordinates: features"
@@ -219,7 +219,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -227,14 +227,14 @@
     "                init_a = \"standard\", init_b = \"standard\", \n",
     "                provide_optimizers = {\n",
     "                    \"gd\": True, \"adam\": True, \"adagrad\": True, \"rmsprop\": True,\n",
-    "                    \"nr\": True, \"nr_tr\": True,\n",
-    "                    \"irls\": True, \"irls_gd\": True, \"irls_tr\": True, \"irls_gd_tr\": True,\n",
-    "            })"
+    "                    \"nr\": False, \"nr_tr\": False,\n",
+    "                    \"irls\": False, \"irls_gd\": False, \"irls_tr\": False, \"irls_gd_tr\": False,\n",
+    "            }, provide_hessian = False)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -253,27 +253,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "INFO:tensorflow:Step: 0 loss: 650.374732 models converged 0\n",
-      "INFO:tensorflow:Step: 1 loss: 650.209592, converged 83 in 0.645 sec., updated 31, {f: 0, g: 83, x: 0}\n",
-      "INFO:tensorflow:Step: 2 loss: 650.206823, converged 83 in 0.192 sec., updated 1, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 3 loss: 650.198667, converged 83 in 0.199 sec., updated 1, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 4 loss: 650.182757, converged 83 in 0.203 sec., updated 1, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 5 loss: 650.182757, converged 83 in 0.194 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 6 loss: 650.182757, converged 83 in 0.204 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 7 loss: 650.182757, converged 83 in 0.2 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 8 loss: 650.182757, converged 98 in 0.201 sec., updated 0, {f: 0, g: 0, x: 15}\n",
-      "INFO:tensorflow:Step: 9 loss: 650.182757, converged 99 in 0.202 sec., updated 0, {f: 0, g: 0, x: 1}\n",
-      "INFO:tensorflow:Step: 10 loss: 650.182757, converged 99 in 0.194 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 11 loss: 650.182757, converged 99 in 0.194 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 12 loss: 650.182757, converged 99 in 0.19 sec., updated 0, {f: 0, g: 0, x: 0}\n",
-      "INFO:tensorflow:Step: 13 loss: 650.182757, converged 100 in 0.193 sec., updated 0, {f: 0, g: 0, x: 1}\n"
+      "INFO:tensorflow:Step: 0 loss: 615.682423 models converged 0\n",
+      "INFO:tensorflow:Step: 1 loss: 615.682423, converged 100 in 0.227 sec., updated 100, {f: 100, g: 100, x: 100}\n"
      ]
     }
    ],
@@ -281,17 +269,17 @@
     "estimator.train_sequence(training_strategy=[\n",
     "            {\n",
     "                \"learning_rate\": 1,\n",
-    "                \"convergence_criteria\": \"all_converged_ll\",\n",
+    "                \"convergence_criteria\": \"all_converged\",\n",
     "                \"stopping_criteria\": 1e-6,\n",
     "                \"use_batching\": False,\n",
-    "                \"optim_algo\": \"nr_tr\",\n",
+    "                \"optim_algo\": \"gd\",\n",
     "            },\n",
     "        ])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -300,320 +288,78 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<xarray.DataArray ()>\n",
-       "array(-84.127032)"
+       "<xarray.DataArray (design_loc_params: 5, features: 100)>\n",
+       "array([[ 2.079207e-02,  5.072105e+00,  5.357422e+00, ...,  7.132302e+00,\n",
+       "         3.804407e+00,  5.184096e-01],\n",
+       "       [-3.651440e-01, -9.410049e+00, -1.232361e+01, ...,  3.187043e+00,\n",
+       "        -8.003614e+00, -6.045565e-01],\n",
+       "       [-1.111495e+01,  6.376660e+00,  4.579032e+00, ..., -6.709791e+00,\n",
+       "        -8.973671e+00,  2.515338e+00],\n",
+       "       [ 5.207923e+00, -3.547651e+00, -2.312268e+01, ..., -1.051617e+01,\n",
+       "        -3.800159e+00,  1.252344e+00],\n",
+       "       [ 4.002172e+00, -9.691013e+00, -8.613539e-01, ..., -1.567684e+01,\n",
+       "         2.253166e-01, -5.545119e+00]])\n",
+       "Coordinates:\n",
+       "  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'\n",
+       "Dimensions without coordinates: features"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "np.mean(store.a - sim.a)"
+    "store.a - sim.a"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray ()>\n",
-       "array(-0.637152)"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "np.mean(store.b - sim.b)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[ 1.36600688e+05,  1.86841946e+05,  1.61697770e+05,\n",
-       "         1.61222009e+05,  1.60421263e+05,  1.25578231e+05,\n",
-       "         1.88510287e+05,  1.29307146e+05,  1.51278544e+05,\n",
-       "         1.71637615e+05,  1.77025928e+05,  1.72080625e+05,\n",
-       "         1.69513558e+05,  1.30717017e+05,  1.42971344e+05,\n",
-       "         1.28180470e+05,  1.24354358e+05,  1.83470673e+05,\n",
-       "         1.60164500e+05,  1.39658339e+05,  1.58726546e+05,\n",
-       "         1.30253608e+05,  1.77426758e+05,  1.64664323e+05,\n",
-       "         1.07705236e+05,  1.18145893e+05,  1.38891881e+05,\n",
-       "         1.71310584e+05,  1.63992087e+05,  1.83785895e+05,\n",
-       "         1.11725452e+05,  1.02048812e+05,  1.17864771e+05,\n",
-       "         1.60429832e+05,  1.73236891e+05,  1.18189379e+05,\n",
-       "         1.24350625e+05,  1.83446020e+05,  1.29091481e+05,\n",
-       "         1.72074305e+05,  1.92275543e+05,  1.36370243e+05,\n",
-       "         1.38289082e+05,  1.60284886e+05,  1.99862241e+05,\n",
-       "         1.81011155e+05,  1.63575778e+05,  1.98979039e+05,\n",
-       "         1.57633199e+05,  2.00185428e+05,  1.65807483e+05,\n",
-       "         1.55876087e+05,  1.12052748e+05,  1.34068809e+05,\n",
-       "         1.03614147e+05,  1.26435009e+05,  1.58195954e+05,\n",
-       "         1.57939242e+05,  1.94262637e+05,  1.72595280e+05,\n",
-       "         1.14816606e+05,  1.90048906e+05,  1.11892800e+05,\n",
-       "         1.85515427e+05,  1.79462275e+05,  1.61840581e+05,\n",
-       "         1.38948016e+05,  1.26029069e+05,  1.89826897e+05,\n",
-       "         1.95896314e+05,  1.83772458e+05,  1.40717995e+05,\n",
-       "         1.27780375e+05,  1.92972427e+05,  1.84697090e+05,\n",
-       "         1.66191026e+05,  1.29493831e+05,  1.24717599e+05,\n",
-       "         1.93200966e+05,  1.67277191e+05,  1.40858855e+05,\n",
-       "         1.35554049e+05,  1.58251038e+05,  1.11735423e+05,\n",
-       "         1.74338593e+05,  1.68898537e+05,  1.61979892e+05,\n",
-       "         1.54009922e+05,  1.73006998e+05,  1.27857914e+05,\n",
-       "         1.23804970e+05,  1.40832963e+05,  1.33008294e+05,\n",
-       "         1.90844112e+05,  1.36434406e+05,  1.27883891e+05,\n",
-       "         1.32452803e+05,  1.82732615e+05,  1.81457380e+05,\n",
-       "         1.93013253e+05],\n",
-       "       [ 1.54445953e+00,  3.05909575e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  4.46565150e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00,  1.92235758e+00,\n",
-       "         0.00000000e+00,  2.46363918e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  2.71697086e+00,\n",
-       "         1.95154632e+00,  0.00000000e+00,  8.47302097e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  1.39559323e+00,\n",
-       "         0.00000000e+00,  1.05701263e-01,  0.00000000e+00,\n",
-       "         1.34281794e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  4.77700368e-01,  0.00000000e+00,\n",
-       "         1.64828089e+00,  1.62463681e+00,  0.00000000e+00,\n",
-       "         1.63319749e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  9.86431300e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         2.84312371e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  2.62745775e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  1.56232470e-01,  2.69528706e+00,\n",
-       "         0.00000000e+00,  2.53867612e+00,  2.33622989e+00,\n",
-       "         1.01078263e+00,  0.00000000e+00,  2.57767978e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  1.37585627e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  2.05949901e+00,\n",
-       "         1.58620477e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  3.64251741e+00,\n",
-       "         1.72734831e+00,  4.51230141e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  2.76653257e+00,\n",
-       "         0.00000000e+00],\n",
-       "       [-2.32729104e+00, -2.05367096e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.27231177e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.83565458e+00,\n",
-       "         0.00000000e+00, -2.48867187e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.25276182e+00,\n",
-       "        -2.64775144e+00,  0.00000000e+00, -2.46202546e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.61304753e+00,\n",
-       "         0.00000000e+00, -1.31351294e+00,  0.00000000e+00,\n",
-       "        -1.73279553e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -1.72656755e+00,  0.00000000e+00,\n",
-       "        -2.41455405e+00, -2.28191447e+00,  0.00000000e+00,\n",
-       "        -2.96460316e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -1.20887626e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "        -1.14341336e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.99430719e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.21392025e+00, -1.48195464e+00,\n",
-       "         0.00000000e+00, -1.92268014e-01, -1.22094320e+00,\n",
-       "        -2.50058692e+00,  0.00000000e+00, -6.83935374e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.50694282e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.38320697e+00,\n",
-       "        -1.28503616e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -6.34130746e-01,\n",
-       "        -1.28115927e+00, -9.90750532e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.25076041e+00,\n",
-       "         0.00000000e+00],\n",
-       "       [-1.67364705e+00, -2.20747216e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.16074149e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.15348090e+00,\n",
-       "         0.00000000e+00, -7.75272089e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -5.01629634e-01,\n",
-       "        -1.65291138e+00,  0.00000000e+00, -1.83672505e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.47599285e+00,\n",
-       "         0.00000000e+00, -2.49183441e+00,  0.00000000e+00,\n",
-       "        -2.25441905e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.17708091e+00,  0.00000000e+00,\n",
-       "        -1.70461297e+00, -1.93925454e+00,  0.00000000e+00,\n",
-       "        -1.21010116e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.31758032e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "        -9.40813337e-01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -4.27078030e-02,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.03240035e+00, -1.59750233e+00,\n",
-       "         0.00000000e+00, -1.13952258e+00, -2.93926199e+00,\n",
-       "        -1.41135606e+00,  0.00000000e+00, -2.82863812e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.07676013e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.50577772e+00,\n",
-       "        -2.32829294e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.02268440e-01,\n",
-       "        -1.96934735e+00, -2.25841446e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.20595706e+00,\n",
-       "         0.00000000e+00],\n",
-       "       [-2.10765358e+00, -2.14294402e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.94081618e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.65920182e+00,\n",
-       "         0.00000000e+00, -1.74490575e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.81008591e+00,\n",
-       "        -1.38704494e+00,  0.00000000e+00, -1.98391449e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.16010059e+00,\n",
-       "         0.00000000e+00, -1.96127914e+00,  0.00000000e+00,\n",
-       "        -2.24185834e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.53916699e+00,  0.00000000e+00,\n",
-       "        -1.91495004e+00, -2.03812612e+00,  0.00000000e+00,\n",
-       "        -1.68412802e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00, -2.59627712e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "        -2.39003182e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  1.62007343e-01,  0.00000000e+00,\n",
-       "         0.00000000e+00, -1.98591181e+00, -1.94062697e+00,\n",
-       "         0.00000000e+00, -2.86622331e+00, -6.20792981e-01,\n",
-       "        -2.17767640e+00,  0.00000000e+00,  8.25520670e-01,\n",
-       "         0.00000000e+00,  0.00000000e+00, -2.56718444e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.76884641e+00,\n",
-       "        -3.03975291e+01,  0.00000000e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00,  6.97524704e-02,\n",
-       "        -2.46433494e+00, -2.25680107e+00,  0.00000000e+00,\n",
-       "         0.00000000e+00,  0.00000000e+00, -1.19071469e+00,\n",
-       "         0.00000000e+00]])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "store.a"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray (design_loc_params: 5, features: 100)>\n",
-       "array([[1.363856e+05, 1.866230e+05, 1.615200e+05, ..., 1.825047e+05,\n",
-       "        1.812539e+05, 1.928044e+05],\n",
-       "       [1.504359e+02, 1.926565e+02, 1.503140e+02, ..., 1.998073e+02,\n",
-       "        1.363696e+02, 1.546470e+02],\n",
-       "       [1.771764e+02, 1.630434e+02, 1.008328e+02, ..., 1.960234e+02,\n",
-       "        1.361614e+02, 1.567535e+02],\n",
-       "       [1.909409e+02, 1.566499e+02, 1.457898e+02, ..., 1.685650e+02,\n",
-       "        1.806504e+02, 1.750800e+02],\n",
-       "       [1.820867e+02, 1.572823e+02, 1.671585e+02, ..., 1.501432e+02,\n",
-       "        1.783009e+02, 1.863868e+02]])\n",
-       "Coordinates:\n",
-       "  * design_loc_params  (design_loc_params) object 'Intercept' ... 'batch[T.3]'\n",
-       "Dimensions without coordinates: features"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "sim.a"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray (design_scale_params: 5, features: 100)>\n",
-       "array([[ 3.819261,  3.583939,  2.574651, ...,  3.879411,  3.496541,  3.304978],\n",
-       "       [-2.134151, -2.023047, -0.49702 , ..., -1.757583, -1.907441, -0.709235],\n",
-       "       [-2.230739, -2.041629, -0.979552, ..., -1.617446, -0.634581, -2.297003],\n",
-       "       [-1.176945, -2.103288, -2.280364, ..., -1.595779, -2.055217, -1.737918],\n",
-       "       [-2.279375, -1.399903, -1.978251, ..., -2.204245, -2.113975, -1.644247]])\n",
-       "Coordinates:\n",
-       "  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'\n",
-       "Dimensions without coordinates: features"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "store.b - sim.b"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<xarray.DataArray (design_scale_params: 5, features: 100)>\n",
-       "array([[1.957463, 2.038514, 2.301491, ..., 1.100939, 1.967538, 1.471212],\n",
-       "       [2.152351, 2.031339, 0.49702 , ..., 1.757583, 1.929907, 0.709235],\n",
-       "       [2.210487, 2.02778 , 0.979552, ..., 1.617446, 0.610368, 2.297003],\n",
-       "       [1.15282 , 2.089364, 2.280364, ..., 1.595779, 2.034945, 1.737918],\n",
-       "       [2.258974, 1.385695, 1.978251, ..., 2.204245, 2.094227, 1.644247]])\n",
-       "Coordinates:\n",
-       "  * design_scale_params  (design_scale_params) object 'Intercept' ... 'batch[T.3]'\n",
-       "Dimensions without coordinates: features"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "sim.b"
    ]