Skip to content

Commit

Permalink
Add combine_roulette function (#555)
Browse files Browse the repository at this point in the history
* Add combine_roulette function

* check weights are positive
  • Loading branch information
aloctavodia authored Oct 3, 2024
1 parent 5578b81 commit c556b46
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 24 deletions.
14 changes: 7 additions & 7 deletions preliz/internal/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,18 @@ def func(params, dist, x_vals):
return opt


def optimize_cdf(dist, x_vals, ecdf, none_idx, fixed):
def func(params, dist, x_vals, ecdf):
def optimize_pdf(dist, x_vals, epdf, none_idx, fixed):
def func(params, dist, x_vals, epdf):
params = get_params(dist, params, none_idx, fixed)
dist._parametrization(**params)
loss = dist.cdf(x_vals) - ecdf
loss = dist.pdf(x_vals) - epdf
return loss

init_vals = np.array(dist.params)[none_idx]
bounds = np.array(dist.params_support)[none_idx]
bounds = list(zip(*bounds))

opt = least_squares(func, x0=init_vals, args=(dist, x_vals, ecdf), bounds=bounds)
opt = least_squares(func, x0=init_vals, args=(dist, x_vals, epdf), bounds=bounds)
params = get_params(dist, opt["x"], none_idx, fixed)
dist._parametrization(**params)
loss = opt["cost"]
Expand Down Expand Up @@ -305,9 +305,9 @@ def get_distributions(dist_names):
return dists


def fit_to_ecdf(selected_distributions, x_vals, ecdf, mean, std, x_min, x_max, extra_pros):
def fit_to_epdf(selected_distributions, x_vals, epdf, mean, std, x_min, x_max, extra_pros):
"""
Minimize the difference between the cdf and the ecdf over a grid of values
Minimize the difference between the pdf and the epdf over a grid of values
defined by x_min and x_max
Note: This function is intended to be used with pz.roulette
Expand All @@ -325,7 +325,7 @@ def fit_to_ecdf(selected_distributions, x_vals, ecdf, mean, std, x_min, x_max, e
if dist._check_endpoints(x_min, x_max, raise_error=False):
none_idx, fixed = get_fixed_params(dist)
dist._fit_moments(mean, std) # pylint:disable=protected-access
loss = optimize_cdf(dist, x_vals, ecdf, none_idx, fixed)
loss = optimize_pdf(dist, x_vals, epdf, none_idx, fixed)

fitted.update(loss, dist)

Expand Down
49 changes: 49 additions & 0 deletions preliz/tests/test_combine_roulette.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import pytest
from numpy.testing import assert_almost_equal
from preliz import combine_roulette
from preliz.distributions import BetaScaled, LogNormal, StudentT

response0 = (
[1.5, 2.5, 3.5],
[0.32142857142857145, 0.35714285714285715, 0.32142857142857145],
28,
0,
10,
10,
11,
)
response1 = (
[7.5, 8.5, 9.5],
[0.32142857142857145, 0.35714285714285715, 0.32142857142857145],
28,
0,
10,
10,
11,
)
response2 = ([9.5], [1], 10, 0, 10, 10, 11)
response3 = ([9.5], [1], 10, 0, 10, 10, 14)


@pytest.mark.parametrize(
"responses, weights, dist_names, params, result",
[
([response0, response1], [0.5, 0.5], None, None, BetaScaled(1.2, 1, 0, 10)),
(
[response0, response1],
[0.5, 0.5],
["Beta", "StudentT"],
"TruncatedNormal(lower=0), StudentT(nu=1000)",
StudentT(1000, 5.5, 3.1),
),
([response0, response2], [1, 1], None, None, LogNormal(1.1, 0.6)),
],
)
def test_combine_roulette(responses, weights, dist_names, params, result):
dist = combine_roulette(responses, weights, dist_names, params)
assert_almost_equal(dist.params, result.params, decimal=1)


def test_combine_roulette_error():
with pytest.raises(ValueError):
combine_roulette([response0, response3])
3 changes: 2 additions & 1 deletion preliz/unidimensional/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from .beta_mode import beta_mode
from .combine_roulette import combine_roulette
from .maxent import maxent
from .mle import mle
from .quartile import quartile
from .quartile_int import quartile_int
from .roulette import Roulette

__all__ = ["beta_mode", "maxent", "mle", "Roulette", "quartile", "quartile_int"]
__all__ = ["beta_mode", "combine_roulette", "maxent", "mle", "Roulette", "quartile", "quartile_int"]
91 changes: 91 additions & 0 deletions preliz/unidimensional/combine_roulette.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import numpy as np

from preliz.internal.distribution_helper import process_extra
from preliz.internal.optimization import fit_to_epdf, get_distributions


def combine_roulette(responses, weights=None, dist_names=None, params=None):
"""
Combine multiple elicited distributions into a single distribution.
Parameters
----------
responses : list of tuples
Typically, each tuple comes from the ``.inputs`` attribute of a ``Roulette`` object and
represents a single elicited distribution.
weights : array-like, optional
Weights for each elicited distribution. Defaults to None, i.e. equal weights.
The sum of the weights must be equal to 1, otherwise it will be normalized.
dist_names: list
List of distributions names to be used in the elicitation.
Defaults to ["Normal", "BetaScaled", "Gamma", "LogNormal", "StudentT"].
params : str, optional
Extra parameters to be passed to the distributions. The format is a string with the
PreliZ's distribution name followed by the argument to fix.
For example: "TruncatedNormal(lower=0), StudentT(nu=8)".
Returns
-------
PreliZ distribution
"""

if params is not None:
extra_pros = process_extra(params)
else:
extra_pros = []

if weights is None:
weights = np.full(len(responses), 1 / len(responses))
else:
weights = np.array(weights, dtype=float)

if np.any(weights <= 0):
raise ValueError("The weights must be positive.")

weights /= weights.sum()

if not all(records[3:] == responses[0][3:] for records in responses):
raise ValueError(
"To combine single elicitation instances, the grid should be the same for all of them."
)

if dist_names is None:
dist_names = ["Normal", "BetaScaled", "Gamma", "LogNormal", "StudentT"]

new_pdf = {}
for records, weight in zip(responses, weights):
chips = records[2]
for x_i, pdf_i in zip(records[0], records[1]):
if x_i in new_pdf:
new_pdf[x_i] += pdf_i * weight * chips
else:
new_pdf[x_i] = pdf_i * weight * chips

total = sum(new_pdf.values())
mean = 0
for x_i, pdf_i in new_pdf.items():
val = pdf_i / total
mean += x_i * val
new_pdf[x_i] = val

var = 0
for x_i, pdf_i in new_pdf.items():
var += pdf_i * (x_i - mean) ** 2
std = var**0.5

# Assuming all the elicited distributions have the same x_min and x_max
x_min = responses[0][3]
x_max = responses[0][4]

fitted_dist = fit_to_epdf(
get_distributions(dist_names),
list(new_pdf.keys()),
list(new_pdf.values()),
mean,
std,
x_min,
x_max,
extra_pros,
)

return fitted_dist
44 changes: 28 additions & 16 deletions preliz/unidimensional/roulette.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
except ImportError:
pass

from ..internal.optimization import fit_to_ecdf, get_distributions
from ..internal.optimization import fit_to_epdf, get_distributions
from ..internal.plot_helper import check_inside_notebook, representations
from ..internal.distribution_helper import process_extra
from ..distributions import all_discrete, all_continuous
Expand All @@ -36,9 +36,10 @@ def __init__(
Number of columns for the grid. Defaults to 11.
dist_names: list
List of distributions names to be used in the elicitation.
For example: ["Normal", "StudentT"].
Default to None, almost all 1D distributions available in PreliZ will be used,
with some exceptions like Uniform or Cauchy.
Defaults to None, the pre-selected distribution are ["Normal", "BetaScaled",
"Gamma", "LogNormal", "StudentT"] but almost all 1D PreliZ's distributions
are available to be selected from the menu with some exceptions like Uniform
or Cauchy.
params: Optional[str]:
Extra parameters to be passed to the distributions. The format is a string with the
PreliZ's distribution name followed by the argument to fix.
Expand All @@ -49,7 +50,11 @@ def __init__(
Returns
-------
PreliZ distribution
Roulette object
The object has many attributes, but the most important are:
- dist: The fitted distribution
- inputs: A tuple with the x values, the empirical pdf, the total
chips, the x_min, the x_max, the number of rows and the number of columns.
References
----------
Expand All @@ -65,7 +70,7 @@ def __init__(
self._figsize = figsize
self._w_extra = params
self.dist = None
self._hist = None
self.inputs = None

check_inside_notebook(need_widget=True)

Expand Down Expand Up @@ -151,18 +156,18 @@ def _create_grid(self):
def _on_leave_fig(self):
extra_pros = process_extra(self._widgets["w_extra"].value)

x_vals, ecdf, probs, mean, std, filled_columns = self._weights_to_ecdf()
x_vals, epdf, mean, std, filled_columns = self._weights_to_pdf()

fitted_dist = None
if filled_columns > 1:
selected_distributions = get_distributions(self._widgets["w_distributions"].value)

if selected_distributions:
self._reset_dist_panel(yticks=False)
fitted_dist = fit_to_ecdf(
fitted_dist = fit_to_epdf(
selected_distributions,
x_vals,
ecdf,
epdf,
mean,
std,
self._x_min,
Expand All @@ -178,20 +183,27 @@ def _on_leave_fig(self):
self._reset_dist_panel(yticks=True)
self._fig.canvas.draw()

self.hist = (x_vals, probs)
self.inputs = (
x_vals,
epdf,
sum(self._grid._weights.values()),
self._x_min,
self._x_max,
self._nrows,
self._ncols,
)
self.dist = fitted_dist

def _weights_to_ecdf(self):
def _weights_to_pdf(self):
step = (self._x_max - self._x_min) / (self._ncols - 1)
x_vals = [(k + 0.5) * step + self._x_min for k, v in self._grid._weights.items() if v != 0]
total = sum(self._grid._weights.values())
probabilities = [v / total for v in self._grid._weights.values() if v != 0]
cum_sum = np.cumsum(probabilities)
epdf = [v / total for v in self._grid._weights.values() if v != 0]

mean = sum(value * prob for value, prob in zip(x_vals, probabilities))
std = (sum(prob * (value - mean) ** 2 for value, prob in zip(x_vals, probabilities))) ** 0.5
mean = sum(prob * value for value, prob in zip(x_vals, epdf))
std = (sum(prob * (value - mean) ** 2 for value, prob in zip(x_vals, epdf))) ** 0.5

return x_vals, cum_sum, probabilities, mean, std, len(x_vals)
return x_vals, epdf, mean, std, len(x_vals)

def _update_grid(self):
self._ax_grid.cla()
Expand Down

0 comments on commit c556b46

Please sign in to comment.