Skip to content

Commit

Permalink
implement all unmirrored distributions from Stan's R code with testin…
Browse files Browse the repository at this point in the history
…g for lack of support for all distributions; weibull, lognormal, and beta currently do not fully work
  • Loading branch information
mbi6245 committed Jul 23, 2024
1 parent 10e1011 commit 00e6fb7
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 12 deletions.
80 changes: 74 additions & 6 deletions src/ensemble/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import numpy as np
import scipy.optimize
import scipy.stats
from scipy.special import gamma as gamma_func

# from scipy.special import gammainccinv, gammaincinv


# distribution parent class to abstract away the diff scipy funcs
Expand All @@ -29,19 +32,22 @@ def stats(self, moments):

class Exponential(Distribution):
def _create_scipy_dist(self) -> None:
positive_support(self.mean)
lambda_ = 1 / self.mean
self._scipy_dist = scipy.stats.expon(scale=1 / lambda_)


class Gamma(Distribution):
def _create_scipy_dist(self) -> None:
strict_positive_support(self.mean)
alpha = self.mean**2 / self.variance
beta = self.mean / self.variance
self._scipy_dist = scipy.stats.gamma(a=alpha, scale=1 / beta)


class InvGamma(Distribution):
def _create_scipy_dist(self) -> None:
strict_positive_support(self.mean)
optim_params = scipy.optimize.minimize(
fun=self._shape_scale,
# a *good* friend told me that this is a good initial guess and it works so far???
Expand All @@ -63,16 +69,17 @@ def _shape_scale(self, x, samp_mean, samp_var) -> None:

class Fisk(Distribution):
def _create_scipy_dist(self):
positive_support(self.mean)
optim_params = scipy.optimize.minimize(
fun=self._shape_scale,
# start beta at 1.1 and solve for alpha
x0=[self.mean * 1.1 * np.sin(np.pi / 1.1) / np.pi, 1.1],
args=(self.mean, self.variance),
# options={"disp": True},
)
shape, scale = np.abs(optim_params.x)
# print("parameters from optimizer: ", shape, scale)
self._scipy_dist = scipy.stats.fisk(c=scale, scale=shape)
alpha, beta = np.abs(optim_params.x)
# parameterization notes: numpy's c is wikipedia's beta, numpy's scale is wikipedia's alpha
self._scipy_dist = scipy.stats.fisk(c=beta, scale=alpha)

def _shape_scale(self, x, samp_mean, samp_var) -> None:
alpha = x[0]
Expand All @@ -94,12 +101,36 @@ def _create_scipy_dist(self) -> None:

class Weibull(Distribution):
def _create_scipy_dist(self) -> None:
raise NotImplementedError
positive_support(self.mean)
optim_params = scipy.optimize.minimize(
fun=self._shape_scale,
# ideally can invert gamma function for k, then use mean / sd as a guess for lambda
x0=[self.mean / gamma_func(1 + 1 / 1.5), 1.5],
args=(self.mean, self.variance),
options={"disp": True},
)
lambda_, k = np.abs(optim_params.x)
print("params from optim: ", lambda_, k)
self._scipy_dist = scipy.stats.weibull_min(c=k, scale=lambda_)

def _shape_scale(self, x, samp_mean, samp_var) -> None:
lambda_ = x[0]
k = x[1]
mean_guess = lambda_ * gamma_func(1 + (1 / k))
variance_guess = lambda_**2 * (
gamma_func(1 + (2 / k) - gamma_func(1 + (1 / k)) ** 2)
)
return (mean_guess - samp_mean) ** 2 + (variance_guess - samp_var) ** 2


class LogNormal(Distribution):
def _create_scipy_dist(self) -> None:
raise NotImplementedError
# using method of moments gets close, but not quite there
loc = np.log(self.mean / np.sqrt(1 + (self.variance / self.mean**2)))
scale = np.sqrt(np.log(1 + (self.variance / self.mean**2)))
# loc = np.log(self.mean**2 / np.sqrt(self.mean**2 + self.variance))
# scale = np.log(1 + self.variance / self.mean**2)
self._scipy_dist = scipy.stats.lognorm(loc=loc, s=scale)


class Normal(Distribution):
Expand All @@ -111,10 +142,47 @@ def _create_scipy_dist(self) -> None:

class Beta(Distribution):
def _create_scipy_dist(self) -> None:
raise NotImplementedError
beta_bounds(self.mean)
optim_params = scipy.optimize.minimize(
fun=self._shape_scale,
# trying something similar to invgamma, unsuccessful for variance
x0=[2, self.mean * 2 - 2],
args=(self.mean, self.variance),
options={"disp": True},
)
alpha, beta = np.abs(optim_params.x)
print("params from optim: ", alpha, beta)
self._scipy_dist = scipy.stats.beta(a=alpha, b=beta)

def _shape_scale(self, x, samp_mean, samp_var):
alpha = x[0]
beta = x[1]
mean_guess = alpha / (alpha + beta)
variance_guess = (
alpha * beta / ((alpha + beta) ** 2 * (alpha + beta + 1))
)
return (mean_guess - samp_mean) ** 2 + (variance_guess - samp_var) ** 2


# exp, gamma, invgamma, llogis, gumbel, weibull, lognormal, normal, mgamma, mgumbel, beta


# distribution_dict = {"exponential": Exponential()}


### HELPER FUNCTIONS
# the following functions give a crude solution to negative means which surely mean the data is negative
# what about data that is negative, but still has a positive mean?
def positive_support(mean):
if mean < 0:
raise ValueError("This distribution is only supported on [0, np.inf)")


def strict_positive_support(mean):
if mean <= 0:
raise ValueError("This distribution is only supported on (0, np.inf)")


def beta_bounds(mean):
if (mean < 0) or (mean > 1):
raise ValueError("This distribution is only supposrted on [0, 1]")
50 changes: 44 additions & 6 deletions tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@
# @pytest.mark.parametrize("a, b, expected", [(1, 2, 3), (2, 3, 5)])
# def test_add(a, b, expected):
# assert add(a, b) == expected
NEG_MEAN = -2
BETA_MEAN = 0.5
BETA_VARIANCE = 0.2
MEAN = 2
VARIANCE = 8


def test_exp():
# x = np.linspace(0, 1, num=10)
exp = Exponential(MEAN, VARIANCE)
res = exp.stats(moments="mv")
exp_var = MEAN**2
Expand All @@ -49,8 +51,6 @@ def test_invgamma():
def test_fisk():
fisk = Fisk(MEAN, VARIANCE)
res = fisk.stats(moments="mv")
print("resulting mean and var: ", res)
# assert False
assert np.isclose(res[0], MEAN)
assert np.isclose(res[1], VARIANCE)

Expand All @@ -61,13 +61,26 @@ def test_gumbel():
assert np.isclose(res[0], MEAN)
assert np.isclose(res[1], VARIANCE)

gumbel = GumbelR(NEG_MEAN, VARIANCE)
res = gumbel.stats(moments="mv")
assert np.isclose(res[0], NEG_MEAN)
assert np.isclose(res[1], VARIANCE)


def test_weibull():
raise NotImplementedError
weibull = Weibull(MEAN, VARIANCE)
res = weibull.stats(moments="mv")
print("resulting mean and var: ", res)
assert np.isclose(res[0], MEAN)
assert np.isclose(res[1], VARIANCE)


def test_lognormal():
raise NotImplementedError
lognormal = LogNormal(MEAN, VARIANCE)
res = lognormal.stats(moments="mv")
print("resulting mean and var: ", res)
assert np.isclose(res[0], MEAN)
assert np.isclose(res[1], VARIANCE)


def test_normal():
Expand All @@ -76,6 +89,31 @@ def test_normal():
assert np.isclose(res[0], MEAN)
assert np.isclose(res[1], VARIANCE)

norm = Normal(NEG_MEAN, VARIANCE)
res = norm.stats(moments="mv")
assert np.isclose(res[0], NEG_MEAN)
assert np.isclose(res[1], VARIANCE)


def test_beta():
raise NotImplementedError
beta = Beta(BETA_MEAN, VARIANCE)
res = beta.stats(moments="mv")
print("resulting mean and var: ", res)
assert np.isclose(res[0], BETA_MEAN)
assert np.isclose(res[1], VARIANCE)


def test_diff_supports():
# negative means for only positive RVs
with pytest.raises(ValueError):
Exponential(NEG_MEAN, VARIANCE)
with pytest.raises(ValueError):
Gamma(NEG_MEAN, VARIANCE)
with pytest.raises(ValueError):
InvGamma(NEG_MEAN, VARIANCE)
with pytest.raises(ValueError):
Fisk(NEG_MEAN, VARIANCE)

# mean outside of 0 and 1 for Beta
with pytest.raises(ValueError):
Beta(NEG_MEAN, VARIANCE)

0 comments on commit 00e6fb7

Please sign in to comment.