theislab · SabrinaRichter · Mar 23, 2019 · Mar 25, 2019 · Mar 25, 2019 · Mar 26, 2019
diff --git a/batchglm/api/models/__init__.py b/batchglm/api/models/__init__.py
@@ -1,3 +1,4 @@
 from . import glm_nb
 from . import glm_norm
 from . import glm_beta
+from . import glm_bern
diff --git a/batchglm/api/models/glm_bern.py b/batchglm/api/models/glm_bern.py
@@ -0,0 +1,2 @@
+from batchglm.models.glm_bern import InputData, Model, Simulator
+from batchglm.train.tf.glm_bern import Estimator
diff --git a/batchglm/api/utils/random.py b/batchglm/api/utils/random.py
@@ -1 +1 @@
-from batchglm.utils.random import NegativeBinomial, Normal, Beta
+from batchglm.utils.random import NegativeBinomial, Normal, Bernoulli, Beta
diff --git a/batchglm/models/glm_bern/__init__.py b/batchglm/models/glm_bern/__init__.py
@@ -0,0 +1,4 @@
+from .model import Model, Model_XArray
+from .external import InputData
+from .simulator import Simulator
+from .estimator import AbstractEstimator, EstimatorStoreXArray
diff --git a/batchglm/models/glm_bern/estimator.py b/batchglm/models/glm_bern/estimator.py
@@ -0,0 +1,30 @@
+import abc
+
+from .model import Model, Model_XArray
+from .external import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+
+
+class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
+    r"""
+    Estimator base class for generalized linear models (GLMs) with
+    bernoulli noise.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return ESTIMATOR_PARAMS
+
+
+class EstimatorStoreXArray(_EstimatorStore_XArray_GLM, AbstractEstimator, Model_XArray):
+
+    def __init__(self, estim: AbstractEstimator):
+        input_data = estim.input_data
+        # to_xarray triggers the get function of these properties and thereby
+        # causes evaluation of the properties that have not been computed during
+        # training, such as the hessian.
+        params = estim.to_xarray(
+            ["a_var", "b_var", "loss", "log_likelihood", "gradients", "fisher_inv"],
+            coords=input_data.data
+        )
+
+        Model_XArray.__init__(self, input_data, params)
diff --git a/batchglm/models/glm_bern/external.py b/batchglm/models/glm_bern/external.py
@@ -0,0 +1,11 @@
+from batchglm.models.base import SparseXArrayDataArray, SparseXArrayDataSet
+from batchglm.models.base_glm import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
+from batchglm.models.base_glm import InputData, INPUT_DATA_PARAMS
+from batchglm.models.base_glm import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+from batchglm.models.base_glm import _Simulator_GLM
+from batchglm.models.base_glm import closedform_glm_mean, closedform_glm_scale
+
+import batchglm.data as data_utils
+import batchglm.utils.random as rand_utils
+from batchglm.utils.numeric import weighted_mean, weighted_variance
+from batchglm.utils.linalg import groupwise_solve_lm
diff --git a/batchglm/models/glm_bern/model.py b/batchglm/models/glm_bern/model.py
@@ -0,0 +1,83 @@
+import abc
+try:
+    import anndata
+except ImportError:
+    anndata = None
+import xarray as xr
+import numpy as np
+
+from .external import InputData
+from .external import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
+
+# Define distribution parameters:
+MODEL_PARAMS = MODEL_PARAMS.copy()
+MODEL_PARAMS.update({
+    "mu": ("observations", "features"),
+    "r": ("observations", "features"),
+})
+
+class Model(_Model_GLM, metaclass=abc.ABCMeta):
+    """
+    Generalized Linear Model (GLM) with bernoulli noise.
+    """
+
+    @classmethod
+    def param_shapes(cls) -> dict:
+        return MODEL_PARAMS
+
+    def link_loc(self, data):
+        return np.log(data/(1-data))
+
+    def inverse_link_loc(self, data):
+        return 1/(1+np.exp(-data))
+
+    def link_scale(self, data):
+        return data
+
+    def inverse_link_scale(self, data):
+        return data
+
+    @property
+    def eta_loc(self) -> xr.DataArray:
+        # TODO: take this switch out once xr.dataset slicing yields dataarray with loc_names coordinate:
+        if isinstance(self.par_link_loc, xr.DataArray):
+            eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
+        else:
+            eta = np.matmul(self.design_loc.values, self.par_link_loc)
+
+        if self.size_factors is not None:
+            assert False, "size factors not allowed"
+        return eta
+
+    @property
+    def mu(self) -> xr.DataArray:
+        return self.location
+
+    @property
+    def r(self) -> xr.DataArray:
+        return self.scale
+
+
+def model_from_params(*args, **kwargs) -> Model:
+    (input_data, params) = _model_from_params(*args, **kwargs)
+    return Model_XArray(input_data, params)
+
+
+class Model_XArray(_Model_XArray_GLM, Model):
+    _input_data: InputData
+    params: xr.Dataset
+
+    def __init__(self, input_data: InputData, params: xr.Dataset):
+        super(_Model_XArray_GLM, self).__init__(input_data=input_data, params=params)
+        super(Model, self).__init__()
+
+    def __str__(self):
+        return "[%s.%s object at %s]: data=%s" % (
+            type(self).__module__,
+            type(self).__name__,
+            hex(id(self)),
+            self.params
+        )
+
+    def __repr__(self):
+        return self.__str__()
diff --git a/batchglm/models/glm_bern/simulator.py b/batchglm/models/glm_bern/simulator.py
@@ -0,0 +1,47 @@
+import numpy as np
+
+from .model import Model
+from .external import rand_utils, _Simulator_GLM
+
+
+class Simulator(_Simulator_GLM, Model):
+    """
+    Simulator for Generalized Linear Models (GLMs) with bernoulli noise.
+    Uses logit linker function.
+    """
+
+    def __init__(
+            self,
+            num_observations=1000,
+            num_features=100
+    ):
+        Model.__init__(self)
+        _Simulator_GLM.__init__(
+            self,
+            num_observations=num_observations,
+            num_features=num_features
+        )
+
+    def generate_params(
+            self,
+            rand_fn_ave=lambda shape: np.random.uniform(0.3, 0.4, shape),
+            rand_fn=None,
+            rand_fn_loc=lambda shape: np.random.uniform(0.4, 0.6, shape),
+            rand_fn_scale=lambda shape: np.zeros(shape),
+        ):
+        self._generate_params(
+            self,
+            rand_fn_ave=rand_fn_ave,
+            rand_fn=rand_fn,
+            rand_fn_loc=rand_fn_loc,
+            rand_fn_scale=rand_fn_scale,
+        )
+
+    def generate_data(self):
+        """
+        Sample random data based on bernoulli distribution and parameters.
+        """
+        self.data["X"] = (
+            self.param_shapes()["X"],
+            rand_utils.Bernoulli(mean=self.mu).sample()
+        )
diff --git a/batchglm/models/glm_bern/utils.py b/batchglm/models/glm_bern/utils.py
@@ -0,0 +1,37 @@
+from typing import Union
+
+import numpy as np
+import xarray as xr
+
+from .external import closedform_glm_mean
+from .external import SparseXArrayDataArray
+
+
+def closedform_bern_glm_logitmu(
+        X: Union[xr.DataArray, SparseXArrayDataArray],
+        design_loc,
+        constraints_loc,
+        size_factors=None,
+        link_fn=lambda data: np.log(data/(1-data)),
+        inv_link_fn=lambda data: 1/(1+np.exp(-data))
+):
+    r"""
+    Calculates a closed-form solution for the `mu` parameters of bernoulli GLMs.
+
+    :param X: The sample data
+    :param design_loc: design matrix for location
+    :param constraints_loc: tensor (all parameters x dependent parameters)
+        Tensor that encodes how complete parameter set which includes dependent
+        parameters arises from indepedent parameters: all = <constraints, indep>.
+        This form of constraints is used in vector generalized linear models (VGLMs).
+    :param size_factors: size factors for X
+    :return: tuple: (groupwise_means, mu, rmsd)
+    """
+    return closedform_glm_mean(
+        X=X,
+        dmat=design_loc,
+        constraints=constraints_loc,
+        size_factors=size_factors,
+        link_fn=link_fn,
+        inv_link_fn=inv_link_fn
+    )
diff --git a/batchglm/models/glm_beta/simulator.py b/batchglm/models/glm_beta/simulator.py
@@ -24,10 +24,10 @@ def __init__(
 
     def generate_params(
             self,
-            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
+            rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.3, shape),
             rand_fn=None,
-            rand_fn_loc=lambda shape: np.random.uniform(0.05, 0.15, shape),
-            rand_fn_scale=lambda shape: np.random.uniform(1e5, 2*1e5, shape),
+            rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
+            rand_fn_scale=lambda shape: np.random.uniform(1e2, 2e3, shape),
         ):
         self._generate_params(
             self,

diff --git a/batchglm/models/glm_beta/utils.py b/batchglm/models/glm_beta/utils.py
@@ -35,7 +35,6 @@ def closedform_beta_glm_logitmean(
         dmat=design_loc,
         constraints=constraints_loc,
         size_factors=size_factors,
-        weights=None,
         link_fn=link_fn,
         inv_link_fn=inv_link_fn
     )

diff --git a/batchglm/train/tf/base_glm_all/estimator.py b/batchglm/train/tf/base_glm_all/estimator.py
@@ -73,6 +73,8 @@ def __init__(
             from .external_norm import EstimatorGraph
         elif noise_model == "beta":
             from .external_beta import EstimatorGraph
+        elif noise_model == "bern":
+            from .external_bern import EstimatorGraph
         else:
             raise ValueError("noise model %s was not recognized" % noise_model)
         self.noise_model = noise_model
@@ -356,6 +358,8 @@ def finalize(self):
             from .external_norm import EstimatorStoreXArray
         elif self.noise_model == "beta":
             from .external_beta import EstimatorStoreXArray
+        elif self.noise_model == "bern":
+            from .external_bern import EstimatorStoreXArray
         else:
             raise ValueError("noise model not recognized")
 

diff --git a/batchglm/train/tf/base_glm_all/estimator_graph.py b/batchglm/train/tf/base_glm_all/estimator_graph.py
@@ -66,6 +66,8 @@ def __init__(
             from .external_norm import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
+        elif noise_model == "bern":
+            from .external_bern import ReducibleTensors
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
@@ -252,6 +254,8 @@ def __init__(
             from .external_norm import ReducibleTensors
         elif noise_model == "beta":
             from .external_beta import ReducibleTensors
+        elif noise_model == "bern":
+            from .external_bern import ReducibleTensors
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model
@@ -433,6 +437,8 @@ def __init__(
             from .external_norm import ModelVars
         elif noise_model == "beta":
             from .external_beta import ModelVars
+        elif noise_model == "bern":
+            from .external_bern import ModelVars
         else:
             raise ValueError("noise model not recognized")
         self.noise_model = noise_model

diff --git a/batchglm/train/tf/base_glm_all/external_bern.py b/batchglm/train/tf/base_glm_all/external_bern.py
@@ -0,0 +1,6 @@
+from batchglm.train.tf.glm_bern import EstimatorGraph
+from batchglm.train.tf.glm_bern import BasicModelGraph, ModelVars, ProcessModel
+from batchglm.train.tf.glm_bern import Hessians, FIM, Jacobians, ReducibleTensors
+
+from batchglm.models.glm_bern import AbstractEstimator, EstimatorStoreXArray, InputData, Model
+from batchglm.models.glm_bern.utils import closedform_bern_glm_logitmu
diff --git a/batchglm/train/tf/base_glm_all/reducible_tensors.py b/batchglm/train/tf/base_glm_all/reducible_tensors.py
@@ -37,6 +37,8 @@ def assemble_tensors(self, idx, data):
             from .external_norm import BasicModelGraph
         elif self.noise_model == "beta":
             from .external_beta import BasicModelGraph
+        elif self.noise_model == "bern":
+            from .external_bern import BasicModelGraph
         else:
             raise ValueError("noise model %s was not recognized" % self.noise_model)
 

diff --git a/batchglm/train/tf/glm_bern/__init__.py b/batchglm/train/tf/glm_bern/__init__.py
@@ -0,0 +1,7 @@
+from .estimator import Estimator
+from .estimator_graph import EstimatorGraph
+from .model import BasicModelGraph, ModelVars, ProcessModel
+from .hessians import Hessians
+from .fim import FIM
+from .jacobians import Jacobians
+from .reducible_tensors import ReducibleTensors
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from batchglm.models.glm_bern import InputData, Model, Simulator
		from batchglm.train.tf.glm_bern import Estimator
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from batchglm.utils.random import NegativeBinomial, Normal, Beta
		from batchglm.utils.random import NegativeBinomial, Normal, Bernoulli, Beta