Skip to content

Commit 7a56862

Browse files
authored
Merge pull request #102 from icecube/add_pvalue_from_gamma_fit
add function to compute pval from gamma-fit to trials ...
2 parents 82d158a + ffc51b7 commit 7a56862

File tree

1 file changed

+119
-2
lines changed

1 file changed

+119
-2
lines changed

skyllh/core/analysis_utils.py

+119-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def pointlikesource_to_data_field_array(
7878

7979

8080
def calculate_pval_from_trials(
81-
ts_vals, ts_threshold):
81+
ts_vals, ts_threshold, comp_operator='greater'):
8282
"""Calculates the percentage (p-value) of test-statistic trials that are
8383
above the given test-statistic critical value.
8484
In addition it calculates the standard deviation of the p-value assuming
@@ -90,12 +90,129 @@ def calculate_pval_from_trials(
9090
The ndarray holding the test-statistic values of the trials.
9191
ts_threshold : float
9292
The critical test-statistic value.
93+
comp_operator: string, optional
94+
The comparison operator for p-value calculation. It can be set to one of
95+
the following options: 'greater' or 'greater_equal'.
96+
97+
Returns
98+
-------
99+
p, p_sigma: tuple(float, float)
93100
"""
94-
p = ts_vals[ts_vals > ts_threshold].size / ts_vals.size
101+
if comp_operator == 'greater':
102+
p = ts_vals[ts_vals > ts_threshold].size / ts_vals.size
103+
elif comp_operator == 'greater_equal':
104+
p = ts_vals[ts_vals >= ts_threshold].size / ts_vals.size
105+
else:
106+
raise ValueError(
107+
f"The comp_operator={comp_operator} is not an"
108+
"available option ('greater' or 'greater_equal')."
109+
)
110+
95111
p_sigma = np.sqrt(p * (1 - p) / ts_vals.size)
96112

97113
return (p, p_sigma)
98114

115+
116+
def calculate_pval_from_gammafit_to_trials(ts_vals, ts_threshold,
117+
eta=3.0, n_max=500000):
118+
"""Calculates the probability (p-value) of test-statistic exceeding
119+
the given test-statistic threshold. This calculation relies on fitting
120+
a gamma distribution to a list of ts values.
121+
122+
Parameters
123+
----------
124+
ts_vals : (n_trials,)-shaped 1D ndarray of float
125+
The ndarray holding the test-statistic values of the trials.
126+
ts_threshold : float
127+
The critical test-statistic value.
128+
eta : float, optional
129+
Test-statistic value at which the gamma function is truncated
130+
from below. Default = 3.0.
131+
n_max : int, optional
132+
The maximum number of trials that should be used during
133+
fitting. Default = 500,000
134+
135+
Returns
136+
-------
137+
p, p_sigma: tuple(float, float)
138+
"""
139+
if(ts_threshold < eta):
140+
raise ValueError(
141+
'ts threshold value = %e, eta = %e. The calculation of the p-value'
142+
'from the fit is correct only for ts threshold larger than '
143+
'the truncation threshold eta.',
144+
ts_threshold, eta)
145+
146+
if len(ts_vals) > n_max:
147+
ts_vals = ts_vals[:n_max]
148+
149+
Ntot = len(ts_vals)
150+
ts_eta = ts_vals[ts_vals > eta]
151+
N_prime = len(ts_eta)
152+
alpha = N_prime/Ntot
153+
154+
obj = lambda x: truncated_gamma_logpdf(x[0], x[1], eta=eta,
155+
ts_above_eta=ts_eta,
156+
N_above_eta=N_prime)
157+
x0 = [0.75, 1.8] # Initial values of function parameters.
158+
bounds = [[0.1, 10], [0.1, 10]] # Ranges for the minimization fitter.
159+
r = minimize(obj, x0, bounds=bounds)
160+
pars = r.x
161+
162+
norm = alpha/gamma.sf(eta, a=pars[0], scale=pars[1])
163+
p = norm * gamma.sf(ts_threshold, a=pars[0], scale=pars[1])
164+
165+
# a correct calculation of the error in pvalue due to
166+
# fitting uncertainty remains to be implemented
167+
# return p_sigma = 0 for now for consistentcy with
168+
# calculate_pval_from_trials()
169+
p_sigma = 0.0
170+
return (p, p_sigma)
171+
172+
173+
def calculate_pval_from_trials_mixed(ts_vals, ts_threshold, switch_at_ts=3.0,
174+
eta=None, n_max=500000, comp_operator='greater_equal'):
175+
"""Calculates the probability (p-value) of test-statistic exceeding
176+
the given test-statistic threshold. This calculation relies on fitting
177+
a gamma distribution to a list of ts values if ts_threshold is larger than
178+
switch_at_ts. If ts_threshold is smaller then the pvalue will be taken
179+
from the trials directly.
180+
181+
Parameters
182+
----------
183+
ts_vals : (n_trials,)-shaped 1D ndarray of float
184+
The ndarray holding the test-statistic values of the trials.
185+
ts_threshold : float
186+
The critical test-statistic value.
187+
switch_at_ts : float, optional
188+
Test-statistic value below which p-value is computed from trials
189+
directly. For thresholds greater than switch_at_ts the pvalue is
190+
calculated using a gamma fit.
191+
eta : float, optional
192+
Test-statistic value at which the gamma function is truncated
193+
from below. Default is None.
194+
n_max : int, optional
195+
The maximum number of trials that should be used during
196+
fitting. Default = 500,000
197+
comp_operator: string, optional
198+
The comparison operator for p-value calculation. It can be set to one of
199+
the following options: 'greater' or 'greater_equal'.
200+
201+
Returns
202+
-------
203+
p, p_sigma: tuple(float, float)
204+
"""
205+
# Set `eta` to `switch_at_ts` as a default.
206+
# It makes sure that both functions return the same pval at `switch_at_ts`.
207+
if eta is None:
208+
eta = switch_at_ts
209+
210+
if ts_threshold < switch_at_ts:
211+
return calculate_pval_from_trials(ts_vals, ts_threshold, comp_operator=comp_operator)
212+
else:
213+
return calculate_pval_from_gammafit_to_trials(ts_vals, ts_threshold, eta=eta, n_max=n_max)
214+
215+
99216
def truncated_gamma_logpdf(
100217
a, scale, eta, ts_above_eta, N_above_eta):
101218
"""Calculates the -log(likelihood) of a sample of random numbers

0 commit comments

Comments
 (0)