diff --git a/src/regmod/models/__init__.py b/src/regmod/models/__init__.py index 5e5ce5f..5d2680b 100644 --- a/src/regmod/models/__init__.py +++ b/src/regmod/models/__init__.py @@ -2,11 +2,28 @@ Models """ +from .binomial import BinomialModel, CanonicalBinomialModel, create_binomial_model +from .gaussian import CanonicalGaussianModel, GaussianModel, create_gaussian_model from .model import Model -from .gaussian import GaussianModel -from .poisson import PoissonModel -from .binomial import BinomialModel from .negativebinomial import NegativeBinomialModel from .pogit import PogitModel -from .weibull import WeibullModel +from .poisson import CanonicalPoissonModel, PoissonModel, create_poisson_model from .tobit import TobitModel +from .weibull import WeibullModel + +__all__ = [ + "BinomialModel", + "CanonicalBinomialModel", + "create_binomial_model", + "CanonicalGaussianModel", + "GaussianModel", + "create_gaussian_model", + "Model", + "NegativeBinomialModel", + "PogitModel", + "CanonicalPoissonModel", + "PoissonModel", + "create_poisson_model", + "TobitModel", + "WeibullModel", +] diff --git a/src/regmod/models/binomial.py b/src/regmod/models/binomial.py index ff8289c..446b0b9 100644 --- a/src/regmod/models/binomial.py +++ b/src/regmod/models/binomial.py @@ -5,12 +5,12 @@ import numpy as np from scipy.stats import binom +from regmod._typing import Callable, DataFrame, NDArray from regmod.data import Data from regmod.optimizer import msca_optimize -from regmod._typing import Callable, NDArray, DataFrame from .model import Model -from .utils import model_post_init +from .utils import get_params, model_post_init class BinomialModel(Model): @@ -26,10 +26,27 @@ def __init__(self, data: Data, **kwargs): def attach_df(self, df: DataFrame): super().attach_df(df) - self.mat[0], self.cmat, self.cvec = model_post_init( - self.mat[0], self.uvec, self.linear_umat, self.linear_uvec + self.mat[0], self.cmat, self.cvec, self.hmat = model_post_init( + self.mat[0], + self.uvec, + self.linear_umat, + self.linear_uvec, + self.gvec, + self.linear_gmat, + self.linear_gvec, ) + def hessian_from_gprior(self) -> NDArray: + """Hessian matrix from the Gaussian prior. + + Returns + ------- + Matrix + Hessian matrix. + + """ + return self.hmat + def objective(self, coefs: NDArray) -> float: """Objective function. Parameters @@ -141,6 +158,12 @@ def jacobian2(self, coefs: NDArray) -> NDArray: jacobian2 = jacobian.dot(jacobian.T) + hess_mat_gprior return jacobian2 + def get_pearson_residuals(self, coefs: NDArray) -> NDArray: + pred = self.params[0].get_param(coefs, self.data, mat=self.mat[0]) + pred_sd = np.sqrt(pred * (1 - pred) / self.data.weights) + + return (self.data.obs - pred) / pred_sd + def fit(self, optimizer: Callable = msca_optimize, **optimizer_options): """Fit function. @@ -173,3 +196,66 @@ def get_ui(self, params: list[NDArray], bounds: tuple[float, float]) -> NDArray: p = params[0] n = self.obs_sample_sizes return [binom.ppf(bounds[0], n=n, p=p), binom.ppf(bounds[1], n=n, p=p)] + + +class CanonicalBinomialModel(BinomialModel): + def __init__(self, data: Data, **kwargs): + super().__init__(data, **kwargs) + if self.params[0].inv_link.name != "expit": + raise ValueError( + "Canonical Binomial model requires inverse link to be expit." + ) + + def objective(self, coefs: NDArray) -> float: + weights = self.data.weights * self.data.trim_weights + y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]) + + prior_obj = self.objective_from_gprior(coefs) + likli_obj = weights.dot(np.log(1 + np.exp(-y)) + (1 - self.data.obs) * y) + return prior_obj + likli_obj + + def gradient(self, coefs: NDArray) -> NDArray: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])) + + prior_grad = self.gradient_from_gprior(coefs) + likli_grad = mat.T.dot(weights * (z / (1 + z) - self.data.obs)) + return prior_grad + likli_grad + + def hessian(self, coefs: NDArray) -> NDArray: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])) + likli_hess_scale = weights * (z / ((1 + z) ** 2)) + + likli_hess_right = mat.scale_rows(likli_hess_scale) + likli_hess = mat.T.dot(likli_hess_right) + + return self.hessian_from_gprior() + likli_hess + + def jacobian2(self, coefs: NDArray) -> NDArray: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])) + likli_jac_scale = weights * (z / (1 + z) - self.data.obs) + + likli_jac = mat.T.scale_cols(likli_jac_scale) + likli_jac2 = likli_jac.dot(likli_jac.T) + return self.hessian_from_gprior() + likli_jac2 + + +def create_binomial_model(data: Data, **kwargs) -> BinomialModel: + params = get_params( + params=kwargs.get("params"), + param_specs=kwargs.get("param_specs"), + default_param_specs=BinomialModel.default_param_specs, + ) + + if params[0].inv_link.name == "expit": + return CanonicalBinomialModel(data, params=params) + return BinomialModel(data, params=params) + + +for key in ["param_names", "default_param_specs"]: + setattr(create_binomial_model, key, getattr(BinomialModel, key)) diff --git a/src/regmod/models/gaussian.py b/src/regmod/models/gaussian.py index 89318f2..1083e27 100644 --- a/src/regmod/models/gaussian.py +++ b/src/regmod/models/gaussian.py @@ -5,11 +5,12 @@ import numpy as np from scipy.stats import norm +from regmod._typing import Callable, DataFrame, Matrix, NDArray +from regmod.data import Data from regmod.optimizer import msca_optimize -from regmod._typing import Callable, NDArray, DataFrame from .model import Model -from .utils import model_post_init +from .utils import get_params, model_post_init class GaussianModel(Model): @@ -18,10 +19,19 @@ class GaussianModel(Model): def attach_df(self, df: DataFrame): super().attach_df(df) - self.mat[0], self.cmat, self.cvec = model_post_init( - self.mat[0], self.uvec, self.linear_umat, self.linear_uvec + self.mat[0], self.cmat, self.cvec, self.hmat = model_post_init( + self.mat[0], + self.uvec, + self.linear_umat, + self.linear_uvec, + self.gvec, + self.linear_gmat, + self.linear_gvec, ) + def hessian_from_gprior(self) -> Matrix: + return self.hmat + def objective(self, coefs: NDArray) -> float: """Objective function. Parameters @@ -123,6 +133,12 @@ def jacobian2(self, coefs: NDArray) -> NDArray: jacobian2 = jacobian.dot(jacobian.T) + hess_mat_gprior return jacobian2 + def get_pearson_residuals(self, coefs: NDArray) -> NDArray: + pred = self.params[0].get_param(coefs, self.data, mat=self.mat[0]) + pred_sd = 1.0 / np.sqrt(self.data.weights) + + return (self.data.obs - pred) / pred_sd + def fit(self, optimizer: Callable = msca_optimize, **optimizer_options): """Fit function. @@ -149,3 +165,66 @@ def get_ui(self, params: list[NDArray], bounds: tuple[float, float]) -> NDArray: norm.ppf(bounds[0], loc=mean, scale=sd), norm.ppf(bounds[1], loc=mean, scale=sd), ] + + +class CanonicalGaussianModel(GaussianModel): + def __init__(self, data: Data, **kwargs): + super().__init__(data, **kwargs) + if self.params[0].inv_link.name != "identity": + raise ValueError( + "Canonical Gaussian model requires inverse link to be identity." + ) + + def objective(self, coefs: NDArray) -> float: + weights = self.data.weights * self.data.trim_weights + y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]) + + prior_obj = self.objective_from_gprior(coefs) + likli_obj = 0.5 * weights.dot((y - self.data.obs) ** 2) + return prior_obj + likli_obj + + def gradient(self, coefs: NDArray) -> NDArray: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]) + + prior_grad = self.gradient_from_gprior(coefs) + likli_grad = mat.T.dot(weights * (y - self.data.obs)) + return prior_grad + likli_grad + + def hessian(self, coefs: NDArray) -> Matrix: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + likli_hess_scale = weights + + prior_hess = self.hessian_from_gprior() + likli_hess_right = mat.scale_rows(likli_hess_scale) + likli_hess = mat.T.dot(likli_hess_right) + + return prior_hess + likli_hess + + def jacobian2(self, coefs: NDArray) -> NDArray: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]) + likli_jac_scale = weights * (y - self.data.obs) + + likli_jac = mat.T.scale_cols(likli_jac_scale) + likli_jac2 = likli_jac.dot(likli_jac.T) + return self.hessian_from_gprior() + likli_jac2 + + +def create_gaussian_model(data: Data, **kwargs) -> GaussianModel: + params = get_params( + params=kwargs.get("params"), + param_specs=kwargs.get("param_specs"), + default_param_specs=GaussianModel.default_param_specs, + ) + + if params[0].inv_link.name == "identity": + return CanonicalGaussianModel(data, params=params) + return GaussianModel(data, params=params) + + +for key in ["param_names", "default_param_specs"]: + setattr(create_gaussian_model, key, getattr(GaussianModel, key)) diff --git a/src/regmod/models/model.py b/src/regmod/models/model.py index 43826c6..9a69442 100644 --- a/src/regmod/models/model.py +++ b/src/regmod/models/model.py @@ -3,15 +3,15 @@ """ import numpy as np - from scipy.linalg import block_diag from scipy.sparse import csc_matrix +from regmod._typing import Callable, DataFrame, Matrix, NDArray from regmod.data import Data +from regmod.models.utils import get_params from regmod.optimizer import scipy_optimize from regmod.parameter import Parameter from regmod.utils import sizes_to_slices -from regmod._typing import Callable, NDArray, DataFrame, Matrix class Model: @@ -132,23 +132,9 @@ def __init__( params: list[Parameter] | None = None, param_specs: dict[str, dict] | None = None, ): - if params is None and param_specs is None: - raise ValueError("Must provide `params` or `param_specs`") - - if params is not None: - param_dict = {param.name: param for param in params} - self.params = [param_dict[param_name] for param_name in self.param_names] - else: - self.params = [ - Parameter( - param_name, - **{ - **self.default_param_specs[param_name], - **param_specs[param_name], - }, - ) - for param_name in self.param_names - ] + params = get_params(params, param_specs, self.default_param_specs) + param_dict = {param.name: param for param in params} + self.params = [param_dict[param_name] for param_name in self.param_names] self.data = data if not self.data.is_empty(): @@ -430,6 +416,9 @@ def get_ui(self, params: list[NDArray], bounds: tuple[float, float]) -> NDArray: """ raise NotImplementedError() + def get_pearson_residuals(self, coefs: NDArray) -> NDArray: + raise NotImplementedError() + def detect_outliers(self, coefs: NDArray, bounds: tuple[float, float]) -> NDArray: """Detect outliers. diff --git a/src/regmod/models/poisson.py b/src/regmod/models/poisson.py index fc86565..511f31c 100644 --- a/src/regmod/models/poisson.py +++ b/src/regmod/models/poisson.py @@ -5,12 +5,12 @@ import numpy as np from scipy.stats import poisson +from regmod._typing import Callable, DataFrame, Matrix, NDArray from regmod.data import Data from regmod.optimizer import msca_optimize -from regmod._typing import Callable, NDArray, DataFrame from .model import Model -from .utils import model_post_init +from .utils import get_params, model_post_init class PoissonModel(Model): @@ -24,10 +24,19 @@ def __init__(self, data: Data, **kwargs): def attach_df(self, df: DataFrame): super().attach_df(df) - self.mat[0], self.cmat, self.cvec = model_post_init( - self.mat[0], self.uvec, self.linear_umat, self.linear_uvec + self.mat[0], self.cmat, self.cvec, self.hmat = model_post_init( + self.mat[0], + self.uvec, + self.linear_umat, + self.linear_uvec, + self.gvec, + self.linear_gmat, + self.linear_gvec, ) + def hessian_from_gprior(self): + return self.hmat + def objective(self, coefs: NDArray) -> float: """Objective function. Parameters @@ -126,6 +135,12 @@ def jacobian2(self, coefs: NDArray) -> NDArray: jacobian2 = jacobian.dot(jacobian.T) + hess_mat_gprior return jacobian2 + def get_pearson_residuals(self, coefs: NDArray) -> NDArray: + pred = self.params[0].get_param(coefs, self.data, mat=self.mat[0]) + pred_sd = np.sqrt(pred * self.data.weights) + + return (self.data.obs - pred) / pred_sd + def fit(self, optimizer: Callable = msca_optimize, **optimizer_options): """Fit function. @@ -148,3 +163,66 @@ def d2nll(self, params: list[NDArray]) -> list[list[NDArray]]: def get_ui(self, params: list[NDArray], bounds: tuple[float, float]) -> NDArray: mean = params[0] return [poisson.ppf(bounds[0], mu=mean), poisson.ppf(bounds[1], mu=mean)] + + +class CanonicalPoissonModel(PoissonModel): + def __init__(self, data: Data, **kwargs): + super().__init__(data, **kwargs) + if self.params[0].inv_link.name != "exp": + raise ValueError("Canonical Poisson model requires inverse link to be exp.") + + def objective(self, coefs: NDArray) -> float: + weights = self.data.weights * self.data.trim_weights + y = self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0]) + z = np.exp(y) + + prior_obj = self.objective_from_gprior(coefs) + likli_obj = weights.dot(z - self.data.obs * y) + return prior_obj + likli_obj + + def gradient(self, coefs: NDArray) -> NDArray: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])) + + prior_grad = self.gradient_from_gprior(coefs) + likli_grad = mat.T.dot(weights * (z - self.data.obs)) + return prior_grad + likli_grad + + def hessian(self, coefs: NDArray) -> Matrix: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])) + likli_hess_scale = weights * z + + prior_hess = self.hessian_from_gprior() + likli_hess_right = mat.scale_rows(likli_hess_scale) + likli_hess = mat.T.dot(likli_hess_right) + + return prior_hess + likli_hess + + def jacobian2(self, coefs: NDArray) -> NDArray: + mat = self.mat[0] + weights = self.data.weights * self.data.trim_weights + z = np.exp(self.params[0].get_lin_param(coefs, self.data, mat=self.mat[0])) + likli_jac_scale = weights * (z - self.data.obs) + + likli_jac = mat.T.scale_cols(likli_jac_scale) + likli_jac2 = likli_jac.dot(likli_jac.T) + return self.hessian_from_gprior() + likli_jac2 + + +def create_poisson_model(data: Data, **kwargs) -> PoissonModel: + params = get_params( + params=kwargs.get("params"), + param_specs=kwargs.get("param_specs"), + default_param_specs=PoissonModel.default_param_specs, + ) + + if params[0].inv_link.name == "exp": + return CanonicalPoissonModel(data, params=params) + return PoissonModel(data, params=params) + + +for key in ["param_names", "default_param_specs"]: + setattr(create_poisson_model, key, getattr(PoissonModel, key)) diff --git a/src/regmod/models/utils.py b/src/regmod/models/utils.py index f6fcb96..db41894 100644 --- a/src/regmod/models/utils.py +++ b/src/regmod/models/utils.py @@ -1,7 +1,9 @@ import numpy as np from msca.linalg.matrix import asmatrix from scipy.sparse import csc_matrix -from regmod._typing import NDArray, Matrix + +from regmod._typing import Matrix, NDArray +from regmod.parameter import Parameter def model_post_init( @@ -9,7 +11,10 @@ def model_post_init( uvec: NDArray, linear_umat: NDArray, linear_uvec: NDArray, -) -> tuple[Matrix, Matrix, NDArray]: + gvec: NDArray, + linear_gmat: NDArray, + linear_gvec: NDArray, +) -> tuple[Matrix, Matrix, NDArray, Matrix]: # design matrix issparse = mat.size == 0 or ((mat == 0).sum() / mat.size) > 0.95 if issparse: @@ -35,4 +40,33 @@ def model_post_init( cmat = csc_matrix(cmat).astype(np.float64) cmat = asmatrix(cmat) - return mat, cmat, cvec + gmat = np.vstack([np.identity(mat.shape[1]), linear_gmat]) + gvec = np.hstack([gvec, linear_gvec]) + + if issparse: + gmat = csc_matrix(gmat).astype(np.float64) + gmat = asmatrix(gmat) + + hmat = gmat.T.scale_cols(1.0 / gvec[1] ** 2).dot(gmat) + return mat, cmat, cvec, hmat + + +def get_params( + params: list[Parameter] | None = None, + param_specs: dict[str, dict] | None = None, + default_param_specs: dict[str, dict] | None = None, +) -> list[Parameter]: + if params is None and param_specs is None: + raise ValueError("Please provide `params` or `param_specs`") + + if params is not None: + return params + + default_param_specs = default_param_specs or {} + param_specs = { + key: {**default_param_specs.get(key, {}), **value} + for key, value in param_specs.items() + } + + params = [Parameter(key, **value) for key, value in param_specs.items()] + return params diff --git a/tests/test_binomialmodel.py b/tests/test_binomialmodel.py index 93cc586..ed27e9e 100644 --- a/tests/test_binomialmodel.py +++ b/tests/test_binomialmodel.py @@ -1,15 +1,19 @@ """ Test Binomial Model """ + import numpy as np import pandas as pd import pytest - from regmod.data import Data from regmod.function import fun_dict -from regmod.models import BinomialModel -from regmod.prior import (GaussianPrior, SplineGaussianPrior, - SplineUniformPrior, UniformPrior) +from regmod.models import create_binomial_model +from regmod.prior import ( + GaussianPrior, + SplineGaussianPrior, + SplineUniformPrior, + UniformPrior, +) from regmod.utils import SplineSpecs from regmod.variable import SplineVariable, Variable @@ -21,29 +25,30 @@ def data(): num_obs = 5 obs = np.random.rand(num_obs) sample_size = np.random.poisson(lam=5, size=num_obs) - df = pd.DataFrame({ - "obs": obs, - "sample_size": sample_size, - "cov0": np.random.randn(num_obs), - "cov1": np.random.randn(num_obs) - }) - return Data(col_obs="obs", - col_covs=["cov0", "cov1"], - col_weights="sample_size", - df=df) + df = pd.DataFrame( + { + "obs": obs, + "sample_size": sample_size, + "cov0": np.random.randn(num_obs), + "cov1": np.random.randn(num_obs), + } + ) + return Data( + col_obs="obs", col_covs=["cov0", "cov1"], col_weights="sample_size", df=df + ) @pytest.fixture def wrong_data(): num_obs = 5 - df = pd.DataFrame({ - "obs": np.random.rand(num_obs)*10, - "cov0": np.random.randn(num_obs), - "cov1": np.random.randn(num_obs) - }) - return Data(col_obs="obs", - col_covs=["cov0", "cov1"], - df=df) + df = pd.DataFrame( + { + "obs": np.random.rand(num_obs) * 10, + "cov0": np.random.randn(num_obs), + "cov1": np.random.randn(num_obs), + } + ) + return Data(col_obs="obs", col_covs=["cov0", "cov1"], df=df) @pytest.fixture @@ -58,9 +63,9 @@ def uprior(): @pytest.fixture def spline_specs(): - return SplineSpecs(knots=np.linspace(0.0, 1.0, 5), - degree=3, - knots_type="rel_domain") + return SplineSpecs( + knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain" + ) @pytest.fixture @@ -75,20 +80,21 @@ def spline_uprior(): @pytest.fixture def var_cov0(gprior, uprior): - return Variable(name="cov0", - priors=[gprior, uprior]) + return Variable(name="cov0", priors=[gprior, uprior]) @pytest.fixture def var_cov1(spline_gprior, spline_uprior, spline_specs): - return SplineVariable(name="cov1", - spline_specs=spline_specs, - priors=[spline_gprior, spline_uprior]) + return SplineVariable( + name="cov1", spline_specs=spline_specs, priors=[spline_gprior, spline_uprior] + ) @pytest.fixture def model(data, var_cov0, var_cov1): - return BinomialModel(data, param_specs={"p": {"variables": [var_cov0, var_cov1]}}) + return create_binomial_model( + data, param_specs={"p": {"variables": [var_cov0, var_cov1]}} + ) def test_model_size(model, var_cov0, var_cov1): @@ -128,7 +134,7 @@ def test_model_gradient(model, inv_link): tr_grad = np.zeros(model.size) for i in range(model.size): coefs_c[i] += 1e-16j - tr_grad[i] = model.objective(coefs_c).imag/1e-16 + tr_grad[i] = model.objective(coefs_c).imag / 1e-16 coefs_c[i] -= 1e-16j assert np.allclose(my_grad, tr_grad) @@ -143,7 +149,7 @@ def test_model_hessian(model, inv_link): for i in range(model.size): for j in range(model.size): coefs_c[j] += 1e-16j - tr_hess[i][j] = model.gradient(coefs_c).imag[i]/1e-16 + tr_hess[i][j] = model.gradient(coefs_c).imag[i] / 1e-16 coefs_c[j] -= 1e-16j assert np.allclose(my_hess, tr_hess) @@ -151,11 +157,13 @@ def test_model_hessian(model, inv_link): def test_wrong_data(wrong_data, var_cov0, var_cov1): with pytest.raises(ValueError): - BinomialModel(wrong_data, param_specs={"p": {"variables": [var_cov0, var_cov1]}}) + create_binomial_model( + wrong_data, param_specs={"p": {"variables": [var_cov0, var_cov1]}} + ) def test_get_ui(model): - model.obs_sample_sizes = np.array([100]*5) + model.obs_sample_sizes = np.array([100] * 5) params = [np.full(5, 0.5)] bounds = [0.025, 0.975] ui = model.get_ui(params, bounds) @@ -167,18 +175,20 @@ def test_model_no_variables(): num_obs = 5 obs = np.random.rand(num_obs) sample_size = np.random.poisson(lam=5, size=num_obs) - df = pd.DataFrame({ - "obs": obs, - "sample_size": sample_size, - "offset": np.ones(num_obs), - }) + df = pd.DataFrame( + { + "obs": obs, + "sample_size": sample_size, + "offset": np.ones(num_obs), + } + ) data = Data( col_obs="obs", col_offset="offset", col_weights="sample_size", df=df, ) - model = BinomialModel(data, param_specs={"p": {"offset": "offset"}}) + model = create_binomial_model(data, param_specs={"p": {"offset": "offset"}}) coefs = np.array([]) grad = model.gradient(coefs) hessian = model.hessian(coefs) diff --git a/tests/test_gaussianmodel.py b/tests/test_gaussianmodel.py index 4e85392..476ab18 100644 --- a/tests/test_gaussianmodel.py +++ b/tests/test_gaussianmodel.py @@ -1,15 +1,19 @@ """ Test Gaussian Model """ + import numpy as np import pandas as pd import pytest - from regmod.data import Data from regmod.function import fun_dict -from regmod.models import GaussianModel -from regmod.prior import (GaussianPrior, SplineGaussianPrior, - SplineUniformPrior, UniformPrior) +from regmod.models import create_gaussian_model +from regmod.prior import ( + GaussianPrior, + SplineGaussianPrior, + SplineUniformPrior, + UniformPrior, +) from regmod.utils import SplineSpecs from regmod.variable import SplineVariable, Variable @@ -19,14 +23,14 @@ @pytest.fixture def data(): num_obs = 5 - df = pd.DataFrame({ - "obs": np.random.randn(num_obs), - "cov0": np.random.randn(num_obs), - "cov1": np.random.randn(num_obs) - }) - return Data(col_obs="obs", - col_covs=["cov0", "cov1"], - df=df) + df = pd.DataFrame( + { + "obs": np.random.randn(num_obs), + "cov0": np.random.randn(num_obs), + "cov1": np.random.randn(num_obs), + } + ) + return Data(col_obs="obs", col_covs=["cov0", "cov1"], df=df) @pytest.fixture @@ -41,9 +45,9 @@ def uprior(): @pytest.fixture def spline_specs(): - return SplineSpecs(knots=np.linspace(0.0, 1.0, 5), - degree=3, - knots_type="rel_domain") + return SplineSpecs( + knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain" + ) @pytest.fixture @@ -58,20 +62,21 @@ def spline_uprior(): @pytest.fixture def var_cov0(gprior, uprior): - return Variable(name="cov0", - priors=[gprior, uprior]) + return Variable(name="cov0", priors=[gprior, uprior]) @pytest.fixture def var_cov1(spline_gprior, spline_uprior, spline_specs): - return SplineVariable(name="cov1", - spline_specs=spline_specs, - priors=[spline_gprior, spline_uprior]) + return SplineVariable( + name="cov1", spline_specs=spline_specs, priors=[spline_gprior, spline_uprior] + ) @pytest.fixture def model(data, var_cov0, var_cov1): - return GaussianModel(data, param_specs={"mu": {"variables": [var_cov0, var_cov1]}}) + return create_gaussian_model( + data, param_specs={"mu": {"variables": [var_cov0, var_cov1]}} + ) def test_model_result(model): @@ -117,7 +122,7 @@ def test_model_gradient(model, inv_link): tr_grad = np.zeros(model.size) for i in range(model.size): coefs_c[i] += 1e-16j - tr_grad[i] = model.objective(coefs_c).imag/1e-16 + tr_grad[i] = model.objective(coefs_c).imag / 1e-16 coefs_c[i] -= 1e-16j assert np.allclose(my_grad, tr_grad) @@ -132,7 +137,7 @@ def test_model_hessian(model, inv_link): for i in range(model.size): for j in range(model.size): coefs_c[j] += 1e-16j - tr_hess[i][j] = model.gradient(coefs_c).imag[i]/1e-16 + tr_hess[i][j] = model.gradient(coefs_c).imag[i] / 1e-16 coefs_c[j] -= 1e-16j assert np.allclose(my_hess, tr_hess) @@ -152,8 +157,8 @@ def test_model_jacobian2(model): mat = model.mat[0].to_numpy() param = model.get_params(beta)[0] - residual = (model.data.obs - param)*np.sqrt(model.data.weights) - jacobian = mat.T*residual + residual = (model.data.obs - param) * np.sqrt(model.data.weights) + jacobian = mat.T * residual true_jacobian2 = jacobian.dot(jacobian.T) + model.hessian_from_gprior() assert np.allclose(jacobian2, true_jacobian2) @@ -161,16 +166,18 @@ def test_model_jacobian2(model): def test_model_no_variables(): num_obs = 5 - df = pd.DataFrame({ - "obs": np.random.randn(num_obs), - "offset": np.ones(num_obs), - }) + df = pd.DataFrame( + { + "obs": np.random.randn(num_obs), + "offset": np.ones(num_obs), + } + ) data = Data( col_obs="obs", col_offset="offset", df=df, ) - model = GaussianModel(data, param_specs={"mu": {"offset": "offset"}}) + model = create_gaussian_model(data, param_specs={"mu": {"offset": "offset"}}) coefs = np.array([]) grad = model.gradient(coefs) hessian = model.hessian(coefs) diff --git a/tests/test_poissonmodel.py b/tests/test_poissonmodel.py index 42ac04b..97a4fae 100644 --- a/tests/test_poissonmodel.py +++ b/tests/test_poissonmodel.py @@ -1,15 +1,19 @@ """ Test Poisson Model """ + import numpy as np import pandas as pd import pytest - from regmod.data import Data from regmod.function import fun_dict -from regmod.models import PoissonModel -from regmod.prior import (GaussianPrior, SplineGaussianPrior, - SplineUniformPrior, UniformPrior) +from regmod.models import create_poisson_model +from regmod.prior import ( + GaussianPrior, + SplineGaussianPrior, + SplineUniformPrior, + UniformPrior, +) from regmod.utils import SplineSpecs from regmod.variable import SplineVariable, Variable @@ -19,27 +23,27 @@ @pytest.fixture def data(): num_obs = 5 - df = pd.DataFrame({ - "obs": np.random.rand(num_obs)*10, - "cov0": np.random.randn(num_obs), - "cov1": np.random.randn(num_obs) - }) - return Data(col_obs="obs", - col_covs=["cov0", "cov1"], - df=df) + df = pd.DataFrame( + { + "obs": np.random.rand(num_obs) * 10, + "cov0": np.random.randn(num_obs), + "cov1": np.random.randn(num_obs), + } + ) + return Data(col_obs="obs", col_covs=["cov0", "cov1"], df=df) @pytest.fixture def wrong_data(): num_obs = 5 - df = pd.DataFrame({ - "obs": np.random.randn(num_obs), - "cov0": np.random.randn(num_obs), - "cov1": np.random.randn(num_obs) - }) - return Data(col_obs="obs", - col_covs=["cov0", "cov1"], - df=df) + df = pd.DataFrame( + { + "obs": np.random.randn(num_obs), + "cov0": np.random.randn(num_obs), + "cov1": np.random.randn(num_obs), + } + ) + return Data(col_obs="obs", col_covs=["cov0", "cov1"], df=df) @pytest.fixture @@ -54,9 +58,9 @@ def uprior(): @pytest.fixture def spline_specs(): - return SplineSpecs(knots=np.linspace(0.0, 1.0, 5), - degree=3, - knots_type="rel_domain") + return SplineSpecs( + knots=np.linspace(0.0, 1.0, 5), degree=3, knots_type="rel_domain" + ) @pytest.fixture @@ -71,20 +75,21 @@ def spline_uprior(): @pytest.fixture def var_cov0(gprior, uprior): - return Variable(name="cov0", - priors=[gprior, uprior]) + return Variable(name="cov0", priors=[gprior, uprior]) @pytest.fixture def var_cov1(spline_gprior, spline_uprior, spline_specs): - return SplineVariable(name="cov1", - spline_specs=spline_specs, - priors=[spline_gprior, spline_uprior]) + return SplineVariable( + name="cov1", spline_specs=spline_specs, priors=[spline_gprior, spline_uprior] + ) @pytest.fixture def model(data, var_cov0, var_cov1): - return PoissonModel(data, param_specs={"lam": {"variables": [var_cov0, var_cov1]}}) + return create_poisson_model( + data, param_specs={"lam": {"variables": [var_cov0, var_cov1]}} + ) def test_model_size(model, var_cov0, var_cov1): @@ -124,7 +129,7 @@ def test_model_gradient(model, inv_link): tr_grad = np.zeros(model.size) for i in range(model.size): coefs_c[i] += 1e-16j - tr_grad[i] = model.objective(coefs_c).imag/1e-16 + tr_grad[i] = model.objective(coefs_c).imag / 1e-16 coefs_c[i] -= 1e-16j assert np.allclose(my_grad, tr_grad) @@ -139,7 +144,7 @@ def test_model_hessian(model, inv_link): for i in range(model.size): for j in range(model.size): coefs_c[j] += 1e-16j - tr_hess[i][j] = model.gradient(coefs_c).imag[i]/1e-16 + tr_hess[i][j] = model.gradient(coefs_c).imag[i] / 1e-16 coefs_c[j] -= 1e-16j assert np.allclose(my_hess, tr_hess) @@ -147,7 +152,9 @@ def test_model_hessian(model, inv_link): def test_wrong_data(wrong_data, var_cov0, var_cov1): with pytest.raises(ValueError): - PoissonModel(wrong_data, param_specs={"lam": {"variables": [var_cov0, var_cov1]}}) + create_poisson_model( + wrong_data, param_specs={"lam": {"variables": [var_cov0, var_cov1]}} + ) def test_get_ui(model): @@ -160,16 +167,18 @@ def test_get_ui(model): def test_model_no_variables(): num_obs = 5 - df = pd.DataFrame({ - "obs": np.random.rand(num_obs)*10, - "offset": np.ones(num_obs), - }) + df = pd.DataFrame( + { + "obs": np.random.rand(num_obs) * 10, + "offset": np.ones(num_obs), + } + ) data = Data( col_obs="obs", col_offset="offset", df=df, ) - model = PoissonModel(data, param_specs={"lam": {"offset": "offset"}}) + model = create_poisson_model(data, param_specs={"lam": {"offset": "offset"}}) coefs = np.array([]) grad = model.gradient(coefs) hessian = model.hessian(coefs)