Skip to content

Commit

Permalink
Add canonical models and Pearson residuals (#77)
Browse files Browse the repository at this point in the history
* add canonical link function to binomial distribution and pearson residual function

* pre-compute hessian from gprior in the post_init stage

* add get_params function in the models.utils

* add canonical binomial model and create_binomial_model function

* add canonical gaussian model

* add canonical poisson model

* add __all__ to models.__init__

* add pearson residual for three main models

* remove get_lin_param and directly use the one from param

* add class attributes to model constructor
  • Loading branch information
zhengp0 authored Dec 26, 2024
1 parent d728c69 commit 9f070cf
Show file tree
Hide file tree
Showing 9 changed files with 455 additions and 146 deletions.
25 changes: 21 additions & 4 deletions src/regmod/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,28 @@
Models
"""

from .binomial import BinomialModel, CanonicalBinomialModel, create_binomial_model
from .gaussian import CanonicalGaussianModel, GaussianModel, create_gaussian_model
from .model import Model
from .gaussian import GaussianModel
from .poisson import PoissonModel
from .binomial import BinomialModel
from .negativebinomial import NegativeBinomialModel
from .pogit import PogitModel
from .weibull import WeibullModel
from .poisson import CanonicalPoissonModel, PoissonModel, create_poisson_model
from .tobit import TobitModel
from .weibull import WeibullModel

__all__ = [
"BinomialModel",
"CanonicalBinomialModel",
"create_binomial_model",
"CanonicalGaussianModel",
"GaussianModel",
"create_gaussian_model",
"Model",
"NegativeBinomialModel",
"PogitModel",
"CanonicalPoissonModel",
"PoissonModel",
"create_poisson_model",
"TobitModel",
"WeibullModel",
]
94 changes: 90 additions & 4 deletions src/regmod/models/binomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import numpy as np
from scipy.stats import binom

from regmod._typing import Callable, DataFrame, NDArray
from regmod.data import Data
from regmod.optimizer import msca_optimize
from regmod._typing import Callable, NDArray, DataFrame

from .model import Model
from .utils import model_post_init
from .utils import get_params, model_post_init


class BinomialModel(Model):
Expand All @@ -26,10 +26,27 @@ def __init__(self, data: Data, **kwargs):

def attach_df(self, df: DataFrame):
super().attach_df(df)
self.mat[0], self.cmat, self.cvec = model_post_init(
self.mat[0], self.uvec, self.linear_umat, self.linear_uvec
self.mat[0], self.cmat, self.cvec, self.hmat = model_post_init(
self.mat[0],
self.uvec,
self.linear_umat,
self.linear_uvec,
self.gvec,
self.linear_gmat,
self.linear_gvec,
)

def hessian_from_gprior(self) -> NDArray:
"""Hessian matrix from the Gaussian prior.
Returns
-------
Matrix
Hessian matrix.
"""
return self.hmat

def objective(self, coefs: NDArray) -> float:
"""Objective function.
Parameters
Expand Down Expand Up @@ -141,6 +158,12 @@ def jacobian2(self, coefs: NDArray) -> NDArray:
jacobian2 = jacobian.dot(jacobian.T) + hess_mat_gprior
return jacobian2

def get_pearson_residuals(self, coefs: NDArray) -> NDArray:
pred = self.params[0].get_param(coefs, self.data, mat=self.mat[0])
pred_sd = np.sqrt(pred * (1 - pred) / self.data.weights)

return (self.data.obs - pred) / pred_sd

def fit(self, optimizer: Callable = msca_optimize, **optimizer_options):
"""Fit function.
Expand Down Expand Up @@ -173,3 +196,66 @@ def get_ui(self, params: list[NDArray], bounds: tuple[float, float]) -> NDArray:
p = params[0]
n = self.obs_sample_sizes
return [binom.ppf(bounds[0], n=n, p=p), binom.ppf(bounds[1], n=n, p=p)]


class CanonicalBinomialModel(BinomialModel):
def __init__(self, data: Data, **kwargs):
super().__init__(data, **kwargs)
if self.params[0].inv_link.name != "expit":
raise ValueError(
"Canonical Binomial model requires inverse link to be expit."
)

def objective(self, coefs: NDArray) -> float:
weights = self.data.weights * self.data.trim_weights
y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])

prior_obj = self.objective_from_gprior(coefs)
likli_obj = weights.dot(np.log(1 + np.exp(-y)) + (1 - self.data.obs) * y)
return prior_obj + likli_obj

def gradient(self, coefs: NDArray) -> NDArray:
mat = self.mat[0]
weights = self.data.weights * self.data.trim_weights
z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]))

prior_grad = self.gradient_from_gprior(coefs)
likli_grad = mat.T.dot(weights * (z / (1 + z) - self.data.obs))
return prior_grad + likli_grad

def hessian(self, coefs: NDArray) -> NDArray:
mat = self.mat[0]
weights = self.data.weights * self.data.trim_weights
z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]))
likli_hess_scale = weights * (z / ((1 + z) ** 2))

likli_hess_right = mat.scale_rows(likli_hess_scale)
likli_hess = mat.T.dot(likli_hess_right)

return self.hessian_from_gprior() + likli_hess

def jacobian2(self, coefs: NDArray) -> NDArray:
mat = self.mat[0]
weights = self.data.weights * self.data.trim_weights
z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]))
likli_jac_scale = weights * (z / (1 + z) - self.data.obs)

likli_jac = mat.T.scale_cols(likli_jac_scale)
likli_jac2 = likli_jac.dot(likli_jac.T)
return self.hessian_from_gprior() + likli_jac2


def create_binomial_model(data: Data, **kwargs) -> BinomialModel:
params = get_params(
params=kwargs.get("params"),
param_specs=kwargs.get("param_specs"),
default_param_specs=BinomialModel.default_param_specs,
)

if params[0].inv_link.name == "expit":
return CanonicalBinomialModel(data, params=params)
return BinomialModel(data, params=params)


for key in ["param_names", "default_param_specs"]:
setattr(create_binomial_model, key, getattr(BinomialModel, key))
87 changes: 83 additions & 4 deletions src/regmod/models/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
import numpy as np
from scipy.stats import norm

from regmod._typing import Callable, DataFrame, Matrix, NDArray
from regmod.data import Data
from regmod.optimizer import msca_optimize
from regmod._typing import Callable, NDArray, DataFrame

from .model import Model
from .utils import model_post_init
from .utils import get_params, model_post_init


class GaussianModel(Model):
Expand All @@ -18,10 +19,19 @@ class GaussianModel(Model):

def attach_df(self, df: DataFrame):
super().attach_df(df)
self.mat[0], self.cmat, self.cvec = model_post_init(
self.mat[0], self.uvec, self.linear_umat, self.linear_uvec
self.mat[0], self.cmat, self.cvec, self.hmat = model_post_init(
self.mat[0],
self.uvec,
self.linear_umat,
self.linear_uvec,
self.gvec,
self.linear_gmat,
self.linear_gvec,
)

def hessian_from_gprior(self) -> Matrix:
return self.hmat

def objective(self, coefs: NDArray) -> float:
"""Objective function.
Parameters
Expand Down Expand Up @@ -123,6 +133,12 @@ def jacobian2(self, coefs: NDArray) -> NDArray:
jacobian2 = jacobian.dot(jacobian.T) + hess_mat_gprior
return jacobian2

def get_pearson_residuals(self, coefs: NDArray) -> NDArray:
pred = self.params[0].get_param(coefs, self.data, mat=self.mat[0])
pred_sd = 1.0 / np.sqrt(self.data.weights)

return (self.data.obs - pred) / pred_sd

def fit(self, optimizer: Callable = msca_optimize, **optimizer_options):
"""Fit function.
Expand All @@ -149,3 +165,66 @@ def get_ui(self, params: list[NDArray], bounds: tuple[float, float]) -> NDArray:
norm.ppf(bounds[0], loc=mean, scale=sd),
norm.ppf(bounds[1], loc=mean, scale=sd),
]


class CanonicalGaussianModel(GaussianModel):
def __init__(self, data: Data, **kwargs):
super().__init__(data, **kwargs)
if self.params[0].inv_link.name != "identity":
raise ValueError(
"Canonical Gaussian model requires inverse link to be identity."
)

def objective(self, coefs: NDArray) -> float:
weights = self.data.weights * self.data.trim_weights
y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])

prior_obj = self.objective_from_gprior(coefs)
likli_obj = 0.5 * weights.dot((y - self.data.obs) ** 2)
return prior_obj + likli_obj

def gradient(self, coefs: NDArray) -> NDArray:
mat = self.mat[0]
weights = self.data.weights * self.data.trim_weights
y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])

prior_grad = self.gradient_from_gprior(coefs)
likli_grad = mat.T.dot(weights * (y - self.data.obs))
return prior_grad + likli_grad

def hessian(self, coefs: NDArray) -> Matrix:
mat = self.mat[0]
weights = self.data.weights * self.data.trim_weights
likli_hess_scale = weights

prior_hess = self.hessian_from_gprior()
likli_hess_right = mat.scale_rows(likli_hess_scale)
likli_hess = mat.T.dot(likli_hess_right)

return prior_hess + likli_hess

def jacobian2(self, coefs: NDArray) -> NDArray:
mat = self.mat[0]
weights = self.data.weights * self.data.trim_weights
y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])
likli_jac_scale = weights * (y - self.data.obs)

likli_jac = mat.T.scale_cols(likli_jac_scale)
likli_jac2 = likli_jac.dot(likli_jac.T)
return self.hessian_from_gprior() + likli_jac2


def create_gaussian_model(data: Data, **kwargs) -> GaussianModel:
params = get_params(
params=kwargs.get("params"),
param_specs=kwargs.get("param_specs"),
default_param_specs=GaussianModel.default_param_specs,
)

if params[0].inv_link.name == "identity":
return CanonicalGaussianModel(data, params=params)
return GaussianModel(data, params=params)


for key in ["param_names", "default_param_specs"]:
setattr(create_gaussian_model, key, getattr(GaussianModel, key))
27 changes: 8 additions & 19 deletions src/regmod/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
"""

import numpy as np

from scipy.linalg import block_diag
from scipy.sparse import csc_matrix

from regmod._typing import Callable, DataFrame, Matrix, NDArray
from regmod.data import Data
from regmod.models.utils import get_params
from regmod.optimizer import scipy_optimize
from regmod.parameter import Parameter
from regmod.utils import sizes_to_slices
from regmod._typing import Callable, NDArray, DataFrame, Matrix


class Model:
Expand Down Expand Up @@ -132,23 +132,9 @@ def __init__(
params: list[Parameter] | None = None,
param_specs: dict[str, dict] | None = None,
):
if params is None and param_specs is None:
raise ValueError("Must provide `params` or `param_specs`")

if params is not None:
param_dict = {param.name: param for param in params}
self.params = [param_dict[param_name] for param_name in self.param_names]
else:
self.params = [
Parameter(
param_name,
**{
**self.default_param_specs[param_name],
**param_specs[param_name],
},
)
for param_name in self.param_names
]
params = get_params(params, param_specs, self.default_param_specs)
param_dict = {param.name: param for param in params}
self.params = [param_dict[param_name] for param_name in self.param_names]

self.data = data
if not self.data.is_empty():
Expand Down Expand Up @@ -430,6 +416,9 @@ def get_ui(self, params: list[NDArray], bounds: tuple[float, float]) -> NDArray:
"""
raise NotImplementedError()

def get_pearson_residuals(self, coefs: NDArray) -> NDArray:
raise NotImplementedError()

def detect_outliers(self, coefs: NDArray, bounds: tuple[float, float]) -> NDArray:
"""Detect outliers.
Expand Down
Loading

0 comments on commit 9f070cf

Please sign in to comment.