Skip to content

Commit

Permalink
Fix: Incorrect use of partial in `TweedieDistribution._rowwise_gradie…
Browse files Browse the repository at this point in the history
…nt_hessian` (#889)

* fix

* Add changelog

* fix

* Add tests demonstrating the issue

* Use inv_gaussian_log_* functions when appropriate

* Fix inverse gaussian derivatives

* Update changelog

* Update CHANGELOG.rst

Co-authored-by: Luca Bittarello <[email protected]>

* Clarify why we are using the FIM

* Add test against glmnet

---------

Co-authored-by: Martin Stancsics <[email protected]>
Co-authored-by: Luca Bittarello <[email protected]>
  • Loading branch information
3 people authored Jan 9, 2025
1 parent c3f1bae commit 3e483b9
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 7 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ Changelog
UNRELEASED
----------

**Bug fix:
- Fixed a bug where :meth:`~glum.TweedieDistribution._rowwise_gradient_hessian` and :meth:`~glum.TweedieDistribution._eta_mu_deviance` would call functions with wrong arguments in the ``p = 3`` case.
- Fixed :class:`glum.InverseGaussianDistribution` not using the optimized gradient, Hessian and deviance implementations, as well as those derivatives having the wrong sign.

**Other changes:**

- Build and test with Python 3.13 in CI.
Expand Down
15 changes: 10 additions & 5 deletions src/glum/_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,8 +671,8 @@ def _rowwise_gradient_hessian(
f = gamma_log_rowwise_gradient_hessian
elif 1 < self.power < 2 and isinstance(link, LogLink):
f = partial(tweedie_log_rowwise_gradient_hessian, p=self.power)
elif self.power == 3:
f = partial(inv_gaussian_log_rowwise_gradient_hessian, p=self.power)
elif self.power == 3 and isinstance(link, LogLink):
f = inv_gaussian_log_rowwise_gradient_hessian

if f is not None:
return f(y, sample_weight, eta, mu, gradient_rows, hessian_rows)
Expand Down Expand Up @@ -703,7 +703,7 @@ def _eta_mu_deviance(
elif 1 < self.power < 2 and isinstance(link, LogLink):
f = partial(tweedie_log_eta_mu_deviance, p=self.power)
elif self.power == 3 and isinstance(link, LogLink):
f = partial(inv_gaussian_log_eta_mu_deviance, p=self.power)
f = inv_gaussian_log_eta_mu_deviance

if f is not None:
return f(cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor)
Expand Down Expand Up @@ -1153,6 +1153,11 @@ def unit_deviance(self, y, mu): # noqa D
def _rowwise_gradient_hessian(
self, link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
):
if isinstance(link, LogLink):
return inv_gaussian_log_rowwise_gradient_hessian(
y, sample_weight, eta, mu, gradient_rows, hessian_rows
)

return super()._rowwise_gradient_hessian(
link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
)
Expand All @@ -1169,8 +1174,8 @@ def _eta_mu_deviance(
mu_out,
):
if isinstance(link, LogLink):
return tweedie_log_eta_mu_deviance(
cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor, p=3.0
return inv_gaussian_log_eta_mu_deviance(
cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out, factor
)

return super()._eta_mu_deviance(
Expand Down
6 changes: 4 additions & 2 deletions src/glum/_functions.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,10 @@ def inv_gaussian_log_rowwise_gradient_hessian(
inv_mu = 1 / mu[i]
inv_mu2 = inv_mu ** 2

gradient_rows_out[i] = 2 * weights[i] * (inv_mu - y[i] * inv_mu2)
hessian_rows_out[i] = 2 * weights[i] * (inv_mu - 2 * y[i] * inv_mu2)
gradient_rows_out[i] = weights[i] * (y[i] * inv_mu2 - inv_mu)
# Use the FIM instead of the true Hessian, as the latter is not
# necessarily positive definite.
hessian_rows_out[i] = weights[i] * inv_mu

def inv_gaussian_log_likelihood(
const_floating1d y,
Expand Down
2 changes: 2 additions & 0 deletions tests/glm/test_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def test_deviance_zero(family, chk_values):
(InverseGaussianDistribution(), LogLink()),
(TweedieDistribution(power=1.5), LogLink()),
(TweedieDistribution(power=2.5), LogLink()),
(TweedieDistribution(power=3), LogLink()),
(BinomialDistribution(), LogitLink()),
(TweedieDistribution(power=1.5), TweedieLink(1.5)),
(TweedieDistribution(power=2.5), TweedieLink(2.5)),
Expand Down Expand Up @@ -228,6 +229,7 @@ def f(coef2):
(GammaDistribution(), LogLink(), True),
(InverseGaussianDistribution(), LogLink(), False),
(TweedieDistribution(power=1.5), LogLink(), True),
(TweedieDistribution(power=3), LogLink(), False),
(TweedieDistribution(power=4.5), LogLink(), False),
(NegativeBinomialDistribution(theta=1.0), LogLink(), False),
],
Expand Down
74 changes: 74 additions & 0 deletions tests/glm/test_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,80 @@ def test_binomial_cloglog_unregularized(solver):
assert_allclose(glum_glm.coef_, sm_fit.params[1:], rtol=2e-5)


def test_inv_gaussian_log_enet():
"""Test elastic net regression with inverse gaussian family and log link.
Compare to R's glmnet
"""
# library(glmnet)
# options(digits=10)
# df <- data.frame(a=c(1,2,3,4,5,6), b=c(0,0,0,0,1, 1), y=cy=c(.2,.5,.8,.3,.9,.9))
# x <- data.matrix(df[,c("a", "b")])
# y <- df$y
# fit <- glmnet(
# x=x, y=y, lambda=1, alpha=0.5, intercept=TRUE,
# family=inv.gaussian(link=log),standardize=FALSE, thresh=1e-10
# )
# coef(fit)
# s0
# (Intercept) -1.028655076
# a 0.123000467
# b .
glmnet_intercept = -1.028655076
glmnet_coef = [0.123000467, 0.0]
X = np.array([[1, 2, 3, 4, 5, 6], [0, 0, 0, 0, 1, 1]], dtype="float").T
y = np.array([0.2, 0.5, 0.8, 0.3, 0.9, 0.9])
rng = np.random.RandomState(42)
glm = GeneralizedLinearRegressor(
alpha=1,
l1_ratio=0.5,
family="inverse.gaussian",
link="log",
solver="irls-cd",
gradient_tol=1e-8,
selection="random",
random_state=rng,
)
glm.fit(X, y)
assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)

# same for start_params='zero' and selection='cyclic'
# with reduced precision
glm = GeneralizedLinearRegressor(
alpha=1,
l1_ratio=0.5,
family="inverse.gaussian",
link="log",
solver="irls-cd",
gradient_tol=1e-8,
selection="cyclic",
)
glm.fit(X, y)
assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)

# check warm_start, therefore start with different alpha
glm = GeneralizedLinearRegressor(
alpha=0.005,
l1_ratio=0.5,
family="inverse.gaussian",
max_iter=300,
link="log",
solver="irls-cd",
gradient_tol=1e-8,
selection="cyclic",
)
glm.fit(X, y)
# warm start with original alpha and use of sparse matrices
glm.warm_start = True
glm.alpha = 1
X = sparse.csr_matrix(X)
glm.fit(X, y)
assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-3)
assert_allclose(glm.coef_, glmnet_coef, rtol=1e-3)


@pytest.mark.parametrize("alpha", [0.01, 0.1, 1, 10])
def test_binomial_enet(alpha):
"""Test elastic net regression with binomial family and LogitLink.
Expand Down

0 comments on commit 3e483b9

Please sign in to comment.