Skip to content

Commit c556b46

Browse files
authored
Add combine_roulette function (#555)
* Add combine_roulette function * check weights are positive
1 parent 5578b81 commit c556b46

File tree

5 files changed

+177
-24
lines changed

5 files changed

+177
-24
lines changed

preliz/internal/optimization.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,18 +77,18 @@ def func(params, dist, x_vals):
7777
return opt
7878

7979

80-
def optimize_cdf(dist, x_vals, ecdf, none_idx, fixed):
81-
def func(params, dist, x_vals, ecdf):
80+
def optimize_pdf(dist, x_vals, epdf, none_idx, fixed):
81+
def func(params, dist, x_vals, epdf):
8282
params = get_params(dist, params, none_idx, fixed)
8383
dist._parametrization(**params)
84-
loss = dist.cdf(x_vals) - ecdf
84+
loss = dist.pdf(x_vals) - epdf
8585
return loss
8686

8787
init_vals = np.array(dist.params)[none_idx]
8888
bounds = np.array(dist.params_support)[none_idx]
8989
bounds = list(zip(*bounds))
9090

91-
opt = least_squares(func, x0=init_vals, args=(dist, x_vals, ecdf), bounds=bounds)
91+
opt = least_squares(func, x0=init_vals, args=(dist, x_vals, epdf), bounds=bounds)
9292
params = get_params(dist, opt["x"], none_idx, fixed)
9393
dist._parametrization(**params)
9494
loss = opt["cost"]
@@ -305,9 +305,9 @@ def get_distributions(dist_names):
305305
return dists
306306

307307

308-
def fit_to_ecdf(selected_distributions, x_vals, ecdf, mean, std, x_min, x_max, extra_pros):
308+
def fit_to_epdf(selected_distributions, x_vals, epdf, mean, std, x_min, x_max, extra_pros):
309309
"""
310-
Minimize the difference between the cdf and the ecdf over a grid of values
310+
Minimize the difference between the pdf and the epdf over a grid of values
311311
defined by x_min and x_max
312312
313313
Note: This function is intended to be used with pz.roulette
@@ -325,7 +325,7 @@ def fit_to_ecdf(selected_distributions, x_vals, ecdf, mean, std, x_min, x_max, e
325325
if dist._check_endpoints(x_min, x_max, raise_error=False):
326326
none_idx, fixed = get_fixed_params(dist)
327327
dist._fit_moments(mean, std) # pylint:disable=protected-access
328-
loss = optimize_cdf(dist, x_vals, ecdf, none_idx, fixed)
328+
loss = optimize_pdf(dist, x_vals, epdf, none_idx, fixed)
329329

330330
fitted.update(loss, dist)
331331

preliz/tests/test_combine_roulette.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import pytest
2+
from numpy.testing import assert_almost_equal
3+
from preliz import combine_roulette
4+
from preliz.distributions import BetaScaled, LogNormal, StudentT
5+
6+
response0 = (
7+
[1.5, 2.5, 3.5],
8+
[0.32142857142857145, 0.35714285714285715, 0.32142857142857145],
9+
28,
10+
0,
11+
10,
12+
10,
13+
11,
14+
)
15+
response1 = (
16+
[7.5, 8.5, 9.5],
17+
[0.32142857142857145, 0.35714285714285715, 0.32142857142857145],
18+
28,
19+
0,
20+
10,
21+
10,
22+
11,
23+
)
24+
response2 = ([9.5], [1], 10, 0, 10, 10, 11)
25+
response3 = ([9.5], [1], 10, 0, 10, 10, 14)
26+
27+
28+
@pytest.mark.parametrize(
29+
"responses, weights, dist_names, params, result",
30+
[
31+
([response0, response1], [0.5, 0.5], None, None, BetaScaled(1.2, 1, 0, 10)),
32+
(
33+
[response0, response1],
34+
[0.5, 0.5],
35+
["Beta", "StudentT"],
36+
"TruncatedNormal(lower=0), StudentT(nu=1000)",
37+
StudentT(1000, 5.5, 3.1),
38+
),
39+
([response0, response2], [1, 1], None, None, LogNormal(1.1, 0.6)),
40+
],
41+
)
42+
def test_combine_roulette(responses, weights, dist_names, params, result):
43+
dist = combine_roulette(responses, weights, dist_names, params)
44+
assert_almost_equal(dist.params, result.params, decimal=1)
45+
46+
47+
def test_combine_roulette_error():
48+
with pytest.raises(ValueError):
49+
combine_roulette([response0, response3])

preliz/unidimensional/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from .beta_mode import beta_mode
2+
from .combine_roulette import combine_roulette
23
from .maxent import maxent
34
from .mle import mle
45
from .quartile import quartile
56
from .quartile_int import quartile_int
67
from .roulette import Roulette
78

8-
__all__ = ["beta_mode", "maxent", "mle", "Roulette", "quartile", "quartile_int"]
9+
__all__ = ["beta_mode", "combine_roulette", "maxent", "mle", "Roulette", "quartile", "quartile_int"]
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import numpy as np
2+
3+
from preliz.internal.distribution_helper import process_extra
4+
from preliz.internal.optimization import fit_to_epdf, get_distributions
5+
6+
7+
def combine_roulette(responses, weights=None, dist_names=None, params=None):
8+
"""
9+
Combine multiple elicited distributions into a single distribution.
10+
11+
Parameters
12+
----------
13+
responses : list of tuples
14+
Typically, each tuple comes from the ``.inputs`` attribute of a ``Roulette`` object and
15+
represents a single elicited distribution.
16+
weights : array-like, optional
17+
Weights for each elicited distribution. Defaults to None, i.e. equal weights.
18+
The sum of the weights must be equal to 1, otherwise it will be normalized.
19+
dist_names: list
20+
List of distributions names to be used in the elicitation.
21+
Defaults to ["Normal", "BetaScaled", "Gamma", "LogNormal", "StudentT"].
22+
params : str, optional
23+
Extra parameters to be passed to the distributions. The format is a string with the
24+
PreliZ's distribution name followed by the argument to fix.
25+
For example: "TruncatedNormal(lower=0), StudentT(nu=8)".
26+
27+
Returns
28+
-------
29+
PreliZ distribution
30+
"""
31+
32+
if params is not None:
33+
extra_pros = process_extra(params)
34+
else:
35+
extra_pros = []
36+
37+
if weights is None:
38+
weights = np.full(len(responses), 1 / len(responses))
39+
else:
40+
weights = np.array(weights, dtype=float)
41+
42+
if np.any(weights <= 0):
43+
raise ValueError("The weights must be positive.")
44+
45+
weights /= weights.sum()
46+
47+
if not all(records[3:] == responses[0][3:] for records in responses):
48+
raise ValueError(
49+
"To combine single elicitation instances, the grid should be the same for all of them."
50+
)
51+
52+
if dist_names is None:
53+
dist_names = ["Normal", "BetaScaled", "Gamma", "LogNormal", "StudentT"]
54+
55+
new_pdf = {}
56+
for records, weight in zip(responses, weights):
57+
chips = records[2]
58+
for x_i, pdf_i in zip(records[0], records[1]):
59+
if x_i in new_pdf:
60+
new_pdf[x_i] += pdf_i * weight * chips
61+
else:
62+
new_pdf[x_i] = pdf_i * weight * chips
63+
64+
total = sum(new_pdf.values())
65+
mean = 0
66+
for x_i, pdf_i in new_pdf.items():
67+
val = pdf_i / total
68+
mean += x_i * val
69+
new_pdf[x_i] = val
70+
71+
var = 0
72+
for x_i, pdf_i in new_pdf.items():
73+
var += pdf_i * (x_i - mean) ** 2
74+
std = var**0.5
75+
76+
# Assuming all the elicited distributions have the same x_min and x_max
77+
x_min = responses[0][3]
78+
x_max = responses[0][4]
79+
80+
fitted_dist = fit_to_epdf(
81+
get_distributions(dist_names),
82+
list(new_pdf.keys()),
83+
list(new_pdf.values()),
84+
mean,
85+
std,
86+
x_min,
87+
x_max,
88+
extra_pros,
89+
)
90+
91+
return fitted_dist

preliz/unidimensional/roulette.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
except ImportError:
1010
pass
1111

12-
from ..internal.optimization import fit_to_ecdf, get_distributions
12+
from ..internal.optimization import fit_to_epdf, get_distributions
1313
from ..internal.plot_helper import check_inside_notebook, representations
1414
from ..internal.distribution_helper import process_extra
1515
from ..distributions import all_discrete, all_continuous
@@ -36,9 +36,10 @@ def __init__(
3636
Number of columns for the grid. Defaults to 11.
3737
dist_names: list
3838
List of distributions names to be used in the elicitation.
39-
For example: ["Normal", "StudentT"].
40-
Default to None, almost all 1D distributions available in PreliZ will be used,
41-
with some exceptions like Uniform or Cauchy.
39+
Defaults to None, the pre-selected distribution are ["Normal", "BetaScaled",
40+
"Gamma", "LogNormal", "StudentT"] but almost all 1D PreliZ's distributions
41+
are available to be selected from the menu with some exceptions like Uniform
42+
or Cauchy.
4243
params: Optional[str]:
4344
Extra parameters to be passed to the distributions. The format is a string with the
4445
PreliZ's distribution name followed by the argument to fix.
@@ -49,7 +50,11 @@ def __init__(
4950
5051
Returns
5152
-------
52-
PreliZ distribution
53+
Roulette object
54+
The object has many attributes, but the most important are:
55+
- dist: The fitted distribution
56+
- inputs: A tuple with the x values, the empirical pdf, the total
57+
chips, the x_min, the x_max, the number of rows and the number of columns.
5358
5459
References
5560
----------
@@ -65,7 +70,7 @@ def __init__(
6570
self._figsize = figsize
6671
self._w_extra = params
6772
self.dist = None
68-
self._hist = None
73+
self.inputs = None
6974

7075
check_inside_notebook(need_widget=True)
7176

@@ -151,18 +156,18 @@ def _create_grid(self):
151156
def _on_leave_fig(self):
152157
extra_pros = process_extra(self._widgets["w_extra"].value)
153158

154-
x_vals, ecdf, probs, mean, std, filled_columns = self._weights_to_ecdf()
159+
x_vals, epdf, mean, std, filled_columns = self._weights_to_pdf()
155160

156161
fitted_dist = None
157162
if filled_columns > 1:
158163
selected_distributions = get_distributions(self._widgets["w_distributions"].value)
159164

160165
if selected_distributions:
161166
self._reset_dist_panel(yticks=False)
162-
fitted_dist = fit_to_ecdf(
167+
fitted_dist = fit_to_epdf(
163168
selected_distributions,
164169
x_vals,
165-
ecdf,
170+
epdf,
166171
mean,
167172
std,
168173
self._x_min,
@@ -178,20 +183,27 @@ def _on_leave_fig(self):
178183
self._reset_dist_panel(yticks=True)
179184
self._fig.canvas.draw()
180185

181-
self.hist = (x_vals, probs)
186+
self.inputs = (
187+
x_vals,
188+
epdf,
189+
sum(self._grid._weights.values()),
190+
self._x_min,
191+
self._x_max,
192+
self._nrows,
193+
self._ncols,
194+
)
182195
self.dist = fitted_dist
183196

184-
def _weights_to_ecdf(self):
197+
def _weights_to_pdf(self):
185198
step = (self._x_max - self._x_min) / (self._ncols - 1)
186199
x_vals = [(k + 0.5) * step + self._x_min for k, v in self._grid._weights.items() if v != 0]
187200
total = sum(self._grid._weights.values())
188-
probabilities = [v / total for v in self._grid._weights.values() if v != 0]
189-
cum_sum = np.cumsum(probabilities)
201+
epdf = [v / total for v in self._grid._weights.values() if v != 0]
190202

191-
mean = sum(value * prob for value, prob in zip(x_vals, probabilities))
192-
std = (sum(prob * (value - mean) ** 2 for value, prob in zip(x_vals, probabilities))) ** 0.5
203+
mean = sum(prob * value for value, prob in zip(x_vals, epdf))
204+
std = (sum(prob * (value - mean) ** 2 for value, prob in zip(x_vals, epdf))) ** 0.5
193205

194-
return x_vals, cum_sum, probabilities, mean, std, len(x_vals)
206+
return x_vals, epdf, mean, std, len(x_vals)
195207

196208
def _update_grid(self):
197209
self._ax_grid.cla()

0 commit comments

Comments
 (0)