Skip to content

Adding Bernoulli Distribution #74

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions batchglm/api/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import glm_nb
from . import glm_norm
from . import glm_beta
from . import glm_bern
2 changes: 2 additions & 0 deletions batchglm/api/models/glm_bern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from batchglm.models.glm_bern import InputData, Model, Simulator
from batchglm.train.tf.glm_bern import Estimator
2 changes: 1 addition & 1 deletion batchglm/api/utils/random.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from batchglm.utils.random import NegativeBinomial, Normal, Beta
from batchglm.utils.random import NegativeBinomial, Normal, Bernoulli, Beta
4 changes: 4 additions & 0 deletions batchglm/models/glm_bern/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .model import Model, Model_XArray
from .external import InputData
from .simulator import Simulator
from .estimator import AbstractEstimator, EstimatorStoreXArray
30 changes: 30 additions & 0 deletions batchglm/models/glm_bern/estimator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import abc

from .model import Model, Model_XArray
from .external import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS


class AbstractEstimator(Model, _Estimator_GLM, metaclass=abc.ABCMeta):
r"""
Estimator base class for generalized linear models (GLMs) with
bernoulli noise.
"""

@classmethod
def param_shapes(cls) -> dict:
return ESTIMATOR_PARAMS


class EstimatorStoreXArray(_EstimatorStore_XArray_GLM, AbstractEstimator, Model_XArray):

def __init__(self, estim: AbstractEstimator):
input_data = estim.input_data
# to_xarray triggers the get function of these properties and thereby
# causes evaluation of the properties that have not been computed during
# training, such as the hessian.
params = estim.to_xarray(
["a_var", "b_var", "loss", "log_likelihood", "gradients", "fisher_inv"],
coords=input_data.data
)

Model_XArray.__init__(self, input_data, params)
11 changes: 11 additions & 0 deletions batchglm/models/glm_bern/external.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from batchglm.models.base import SparseXArrayDataArray, SparseXArrayDataSet
from batchglm.models.base_glm import _Estimator_GLM, _EstimatorStore_XArray_GLM, ESTIMATOR_PARAMS
from batchglm.models.base_glm import InputData, INPUT_DATA_PARAMS
from batchglm.models.base_glm import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params
from batchglm.models.base_glm import _Simulator_GLM
from batchglm.models.base_glm import closedform_glm_mean, closedform_glm_scale

import batchglm.data as data_utils
import batchglm.utils.random as rand_utils
from batchglm.utils.numeric import weighted_mean, weighted_variance
from batchglm.utils.linalg import groupwise_solve_lm
83 changes: 83 additions & 0 deletions batchglm/models/glm_bern/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import abc
try:
import anndata
except ImportError:
anndata = None
import xarray as xr
import numpy as np

from .external import InputData
from .external import _Model_GLM, _Model_XArray_GLM, MODEL_PARAMS, _model_from_params

# Define distribution parameters:
MODEL_PARAMS = MODEL_PARAMS.copy()
MODEL_PARAMS.update({
"mu": ("observations", "features"),
"r": ("observations", "features"),
})

class Model(_Model_GLM, metaclass=abc.ABCMeta):
"""
Generalized Linear Model (GLM) with bernoulli noise.
"""

@classmethod
def param_shapes(cls) -> dict:
return MODEL_PARAMS

def link_loc(self, data):
return np.log(data/(1-data))

def inverse_link_loc(self, data):
return 1/(1+np.exp(-data))

def link_scale(self, data):
return data

def inverse_link_scale(self, data):
return data

@property
def eta_loc(self) -> xr.DataArray:
# TODO: take this switch out once xr.dataset slicing yields dataarray with loc_names coordinate:
if isinstance(self.par_link_loc, xr.DataArray):
eta = self.design_loc.dot(self.par_link_loc, dims="design_loc_params")
else:
eta = np.matmul(self.design_loc.values, self.par_link_loc)

if self.size_factors is not None:
assert False, "size factors not allowed"
return eta

@property
def mu(self) -> xr.DataArray:
return self.location

@property
def r(self) -> xr.DataArray:
return self.scale


def model_from_params(*args, **kwargs) -> Model:
(input_data, params) = _model_from_params(*args, **kwargs)
return Model_XArray(input_data, params)


class Model_XArray(_Model_XArray_GLM, Model):
_input_data: InputData
params: xr.Dataset

def __init__(self, input_data: InputData, params: xr.Dataset):
super(_Model_XArray_GLM, self).__init__(input_data=input_data, params=params)
super(Model, self).__init__()

def __str__(self):
return "[%s.%s object at %s]: data=%s" % (
type(self).__module__,
type(self).__name__,
hex(id(self)),
self.params
)

def __repr__(self):
return self.__str__()
47 changes: 47 additions & 0 deletions batchglm/models/glm_bern/simulator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import numpy as np

from .model import Model
from .external import rand_utils, _Simulator_GLM


class Simulator(_Simulator_GLM, Model):
"""
Simulator for Generalized Linear Models (GLMs) with bernoulli noise.
Uses logit linker function.
"""

def __init__(
self,
num_observations=1000,
num_features=100
):
Model.__init__(self)
_Simulator_GLM.__init__(
self,
num_observations=num_observations,
num_features=num_features
)

def generate_params(
self,
rand_fn_ave=lambda shape: np.random.uniform(0.3, 0.4, shape),
rand_fn=None,
rand_fn_loc=lambda shape: np.random.uniform(0.4, 0.6, shape),
rand_fn_scale=lambda shape: np.zeros(shape),
):
self._generate_params(
self,
rand_fn_ave=rand_fn_ave,
rand_fn=rand_fn,
rand_fn_loc=rand_fn_loc,
rand_fn_scale=rand_fn_scale,
)

def generate_data(self):
"""
Sample random data based on bernoulli distribution and parameters.
"""
self.data["X"] = (
self.param_shapes()["X"],
rand_utils.Bernoulli(mean=self.mu).sample()
)
37 changes: 37 additions & 0 deletions batchglm/models/glm_bern/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Union

import numpy as np
import xarray as xr

from .external import closedform_glm_mean
from .external import SparseXArrayDataArray


def closedform_bern_glm_logitmu(
X: Union[xr.DataArray, SparseXArrayDataArray],
design_loc,
constraints_loc,
size_factors=None,
link_fn=lambda data: np.log(data/(1-data)),
inv_link_fn=lambda data: 1/(1+np.exp(-data))
):
r"""
Calculates a closed-form solution for the `mu` parameters of bernoulli GLMs.

:param X: The sample data
:param design_loc: design matrix for location
:param constraints_loc: tensor (all parameters x dependent parameters)
Tensor that encodes how complete parameter set which includes dependent
parameters arises from indepedent parameters: all = <constraints, indep>.
This form of constraints is used in vector generalized linear models (VGLMs).
:param size_factors: size factors for X
:return: tuple: (groupwise_means, mu, rmsd)
"""
return closedform_glm_mean(
X=X,
dmat=design_loc,
constraints=constraints_loc,
size_factors=size_factors,
link_fn=link_fn,
inv_link_fn=inv_link_fn
)
6 changes: 3 additions & 3 deletions batchglm/models/glm_beta/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ def __init__(

def generate_params(
self,
rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.8, shape),
rand_fn_ave=lambda shape: np.random.uniform(0.2, 0.3, shape),
rand_fn=None,
rand_fn_loc=lambda shape: np.random.uniform(0.05, 0.15, shape),
rand_fn_scale=lambda shape: np.random.uniform(1e5, 2*1e5, shape),
rand_fn_loc=lambda shape: np.random.uniform(0.5, 0.6, shape),
rand_fn_scale=lambda shape: np.random.uniform(1e2, 2e3, shape),
):
self._generate_params(
self,
Expand Down
1 change: 0 additions & 1 deletion batchglm/models/glm_beta/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def closedform_beta_glm_logitmean(
dmat=design_loc,
constraints=constraints_loc,
size_factors=size_factors,
weights=None,
link_fn=link_fn,
inv_link_fn=inv_link_fn
)
Expand Down
4 changes: 4 additions & 0 deletions batchglm/train/tf/base_glm_all/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def __init__(
from .external_norm import EstimatorGraph
elif noise_model == "beta":
from .external_beta import EstimatorGraph
elif noise_model == "bern":
from .external_bern import EstimatorGraph
else:
raise ValueError("noise model %s was not recognized" % noise_model)
self.noise_model = noise_model
Expand Down Expand Up @@ -356,6 +358,8 @@ def finalize(self):
from .external_norm import EstimatorStoreXArray
elif self.noise_model == "beta":
from .external_beta import EstimatorStoreXArray
elif self.noise_model == "bern":
from .external_bern import EstimatorStoreXArray
else:
raise ValueError("noise model not recognized")

Expand Down
6 changes: 6 additions & 0 deletions batchglm/train/tf/base_glm_all/estimator_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def __init__(
from .external_norm import ReducibleTensors
elif noise_model == "beta":
from .external_beta import ReducibleTensors
elif noise_model == "bern":
from .external_bern import ReducibleTensors
else:
raise ValueError("noise model not recognized")
self.noise_model = noise_model
Expand Down Expand Up @@ -252,6 +254,8 @@ def __init__(
from .external_norm import ReducibleTensors
elif noise_model == "beta":
from .external_beta import ReducibleTensors
elif noise_model == "bern":
from .external_bern import ReducibleTensors
else:
raise ValueError("noise model not recognized")
self.noise_model = noise_model
Expand Down Expand Up @@ -433,6 +437,8 @@ def __init__(
from .external_norm import ModelVars
elif noise_model == "beta":
from .external_beta import ModelVars
elif noise_model == "bern":
from .external_bern import ModelVars
else:
raise ValueError("noise model not recognized")
self.noise_model = noise_model
Expand Down
6 changes: 6 additions & 0 deletions batchglm/train/tf/base_glm_all/external_bern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from batchglm.train.tf.glm_bern import EstimatorGraph
from batchglm.train.tf.glm_bern import BasicModelGraph, ModelVars, ProcessModel
from batchglm.train.tf.glm_bern import Hessians, FIM, Jacobians, ReducibleTensors

from batchglm.models.glm_bern import AbstractEstimator, EstimatorStoreXArray, InputData, Model
from batchglm.models.glm_bern.utils import closedform_bern_glm_logitmu
2 changes: 2 additions & 0 deletions batchglm/train/tf/base_glm_all/reducible_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ def assemble_tensors(self, idx, data):
from .external_norm import BasicModelGraph
elif self.noise_model == "beta":
from .external_beta import BasicModelGraph
elif self.noise_model == "bern":
from .external_bern import BasicModelGraph
else:
raise ValueError("noise model %s was not recognized" % self.noise_model)

Expand Down
7 changes: 7 additions & 0 deletions batchglm/train/tf/glm_bern/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .estimator import Estimator
from .estimator_graph import EstimatorGraph
from .model import BasicModelGraph, ModelVars, ProcessModel
from .hessians import Hessians
from .fim import FIM
from .jacobians import Jacobians
from .reducible_tensors import ReducibleTensors
Loading