Skip to content

Commit

Permalink
ft: collapsed the MLL test full and light versions into one. A full s…
Browse files Browse the repository at this point in the history
…ampling from the entire stochastic set is possible now with the full_calculation flag. Removed mag_half_bin, which is now calculated directly from the region magnitude bins.
  • Loading branch information
pabloitu committed Oct 15, 2024
1 parent 65ffe27 commit c8a2964
Showing 1 changed file with 31 additions and 116 deletions.
147 changes: 31 additions & 116 deletions csep/core/catalog_evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,31 +357,31 @@ def calibration_test(evaluation_results, delta_1=False):

def resampled_magnitude_test(forecast: "CatalogForecast",
observed_catalog: CSEPCatalog,
mag_half_bin: float = 0.05,
verbose: bool = False,
seed: Optional[int] = None) -> CatalogMagnitudeTestResult:
"""
Performs the resampled magnitude test for catalog-based forecasts (Serafini et al., 2024),
which corrects the bias from the original M-test implementation to the total N of events.
Calculates the pseudo log-likelihood distribution from the synthetic catalog Λ_j as:
# todo: check correct formula
D_j = Σ_k [log(Λ_U(k) / N_U + 1) - log(Λ_j(k) / N_j + 1)]^2
where k are the magnitude bins, Λ_U the union of all synthetic catalogs, N_U the total
number of events in Λ_U, and N_j the number of events in Λ_j
The pseudo log-likelihood score from the observations is calculated as:
# todo: check correct formula
D_o = Σ_k [log(Λ_U(k) / N_U + 1) - log(Ω(k) / N_o + 1)]^2
where Ω is the observed catalog and N_o the total number of observed events.
Args:
forecast (CatalogForecast): The forecast to be evaluated
observed_catalog (CSEPCatalog): The observation/testing catalog.
mag_half_bin (float): half of the magnitude bin size
verbose (bool): Flag to display debug messages
seed (int): random number seed generator
seed (int): Random number generator seed
Returns:
A CatalogMagnitudeTestResult object containing the statistic distribution and the
Expand Down Expand Up @@ -422,6 +422,8 @@ def resampled_magnitude_test(forecast: "CatalogForecast",
union_histogram = numpy.zeros(len(forecast.magnitudes))
for j, cat in enumerate(forecast):
union_histogram += cat.magnitude_counts()

mag_half_bin = numpy.diff(observed_catalog.region.magnitudes)[0] / 2.
# end new
n_union_events = numpy.sum(union_histogram)
obs_histogram = observed_catalog.magnitude_counts()
Expand Down Expand Up @@ -484,20 +486,28 @@ def resampled_magnitude_test(forecast: "CatalogForecast",

def MLL_magnitude_test(forecast: "CatalogForecast",
observed_catalog: CSEPCatalog,
full_calculation: bool = False,
verbose: bool = False,
seed: Optional[int] = None) -> CatalogMagnitudeTestResult:
"""
Implements the modified Multinomial likelihood log-ratio (MLL) magnitude test (Serafini et
al., 2024).
Args:
forecast:
observed_catalog:
verbose:
seed:
forecast (CatalogForecast): The forecast to be evaluated
observed_catalog (CSEPCatalog): The observation/testing catalog.
full_calculation (bool): Whether to sample from the entire stochastic catalogs or from
its already processed magnitude histogram.
verbose (bool): Flag to display debug messages
seed (int): Random number generator seed
Returns:
A CatalogMagnitudeTestResult object containing the statistic distribution and the
observed statistic.
"""

# set seed
if seed:
numpy.random.seed(seed)

Expand Down Expand Up @@ -529,113 +539,15 @@ def MLL_magnitude_test(forecast: "CatalogForecast",

# calculate histograms of union forecast and total number of events
Lambda_u_histogram = numpy.zeros(len(forecast.magnitudes))
Lambda_u = []
for j, cat in enumerate(forecast):
Lambda_u = numpy.append(Lambda_u, cat.get_magnitudes())
Lambda_u_histogram += cat.magnitude_counts()
# # calculate histograms of observations and observed number of events
Omega_histogram = observed_catalog.magnitude_counts()
n_obs = numpy.sum(Omega_histogram)

# compute observed statistic
obs_d_statistic = MLL_score(Lambda_U_counts=Lambda_u_histogram,
Lambda_j_counts=Omega_histogram)

# compute the test statistic for each catalog
t0 = time.time()
for i, catalog in enumerate(forecast):
# this is new - sampled from the union forecast histogram
mag_values = numpy.random.choice(Lambda_u, size=int(n_obs))
extended_mag_max = max(forecast.magnitudes) + 10
Lambda_j_histogram, tmp = numpy.histogram(mag_values,
bins=numpy.append(forecast.magnitudes,
extended_mag_max))
# end new
n_events = numpy.sum(Lambda_j_histogram)
# if n_events == 0:
# print("Skipping to next because catalog contained zero events.")
# continue

# compute magnitude test statistic for the catalog
test_distribution.append(
MLL_score(Lambda_U_counts=Lambda_u_histogram,
Lambda_j_counts=Lambda_j_histogram)
)
# output status
if verbose:
tens_exp = numpy.floor(numpy.log10(i + 1))
if (i + 1) % 10 ** tens_exp == 0:
t1 = time.time()
print(f'Processed {i + 1} catalogs in {t1 - t0} seconds', flush=True)

# score evaluation
delta_1, delta_2 = get_quantiles(test_distribution, obs_d_statistic)

# prepare result
result = CatalogMagnitudeTestResult(test_distribution=test_distribution,
name='M-Test',
observed_statistic=obs_d_statistic,
quantile=(delta_1, delta_2),
status='normal',
min_mw=forecast.min_magnitude,
obs_catalog_repr=str(observed_catalog),
obs_name=observed_catalog.name,
sim_name=forecast.name)

return result


def MLL_magnitude_test_light(forecast: "CatalogForecast",
observed_catalog: CSEPCatalog,
mag_half_bin: float = 0.05,
verbose: bool = False,
seed: Optional[int] = None) -> CatalogMagnitudeTestResult:
"""
Args:
forecast:
observed_catalog:
mag_half_bin:
verbose:
seed:
Returns:
"""

# set seed
if seed:
numpy.random.seed(seed)
""" Performs magnitude test for catalog-based forecasts """
test_distribution = []

if forecast.region.magnitudes is None:
raise CSEPEvaluationException(
"Forecast must have region.magnitudes member to perform magnitude test.")

# short-circuit if zero events
if observed_catalog.event_count == 0:
print("Cannot perform magnitude test when observed event count is zero.")
# prepare result
result = CatalogMagnitudeTestResult(test_distribution=test_distribution,
name='M-Test',
observed_statistic=None,
quantile=(None, None),
status='not-valid',
min_mw=forecast.min_magnitude,
obs_catalog_repr=str(observed_catalog),
obs_name=observed_catalog.name,
sim_name=forecast.name)

return result

# compute expected rates for forecast if needed
if forecast.expected_rates is None:
forecast.get_expected_rates(verbose=verbose)
if full_calculation:
Lambda_u = []
else:
mag_half_bin = numpy.diff(observed_catalog.region.magnitudes)[0] / 2.

# calculate histograms of union forecast and total number of events
Lambda_u_histogram = numpy.zeros(len(forecast.magnitudes))
for j, cat in enumerate(forecast):
if full_calculation:
Lambda_u = numpy.append(Lambda_u, cat.get_magnitudes())
Lambda_u_histogram += cat.magnitude_counts()

# # calculate histograms of observations and observed number of events
Expand All @@ -644,16 +556,19 @@ def MLL_magnitude_test_light(forecast: "CatalogForecast",

# compute observed statistic
obs_d_statistic = MLL_score(Lambda_U_counts=Lambda_u_histogram,
Lambda_j_counts=Omega_histogram)
Lambda_j_counts=Omega_histogram)

probs = Lambda_u_histogram / numpy.sum(Lambda_u_histogram)

# compute the test statistic for each catalog
t0 = time.time()
for i, catalog in enumerate(forecast):
# this is new - sampled from the union forecast histogram
mag_values = numpy.random.choice(forecast.magnitudes + mag_half_bin, p=probs,
size=int(n_obs))
if full_calculation:
mag_values = numpy.random.choice(Lambda_u, size=int(n_obs))
else:
mag_values = numpy.random.choice(forecast.magnitudes + mag_half_bin, p=probs,
size=int(n_obs))
extended_mag_max = max(forecast.magnitudes) + 10
Lambda_j_histogram, tmp = numpy.histogram(mag_values,
bins=numpy.append(forecast.magnitudes,
Expand All @@ -667,7 +582,7 @@ def MLL_magnitude_test_light(forecast: "CatalogForecast",
# compute magnitude test statistic for the catalog
test_distribution.append(
MLL_score(Lambda_U_counts=Lambda_u_histogram,
Lambda_j_counts=Lambda_j_histogram)
Lambda_j_counts=Lambda_j_histogram)
)
# output status
if verbose:
Expand Down

1 comment on commit c8a2964

@Serra314
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Formulas in the resampled_magnitude_test are incorrect. They should be

D_j = Σ_k [log(Λ_U(k) *(N_o/N_U) + 1) - log(Λ_j(k) /(N_o/ N_j) + 1)]^2

and

D_o = Σ_k [log(Λ_U(k) *(N_o/N_U) + 1) - log(Ω(k) + 1)]^2

where N_o is the number of observations N_o = Σ_k Ω(k). Essentially is the log-ratio between rates for each magnitude magnitude bin, where the rates (both for the union and each synthetic catalogue) have been normalised to sum to the number of observations.

The MLL_magnitude_test function works correctly.

Please sign in to comment.