diff --git a/docs/qp.rst b/docs/qp.rst index 4eca637d..bcd6c803 100644 --- a/docs/qp.rst +++ b/docs/qp.rst @@ -13,7 +13,7 @@ these approximations. Ensemble and Factory ==================== -.. automodule:: ensemble +.. automodule:: qp.ensemble :members: :undoc-members: @@ -98,11 +98,21 @@ Gaussian mixture model based Quantification Metrics ====================== +.. automodule:: qp.metrics.metrics + :members: + :undoc-members: -.. automodule:: qp.metrics +.. automodule:: qp.metrics.array_metrics :members: :undoc-members: +.. automodule:: qp.metrics.brier +.. autoclass:: Brier + :members: + +.. automodule:: qp.metrics.pit +.. autoclass:: PIT + :members: Utility functions ================= diff --git a/src/qp/metrics/__init__.py b/src/qp/metrics/__init__.py index b29814ec..3ba3530c 100644 --- a/src/qp/metrics/__init__.py +++ b/src/qp/metrics/__init__.py @@ -1,3 +1,5 @@ from .array_metrics import * from .metrics import * -from .metrics import _calculate_grid_parameters # added for testing purposes + +# added for testing purposes +from .metrics import _calculate_grid_parameters, _check_ensemble_is_not_nested, _check_ensembles_are_same_size \ No newline at end of file diff --git a/src/qp/metrics/array_metrics.py b/src/qp/metrics/array_metrics.py index 76774f95..8083affd 100644 --- a/src/qp/metrics/array_metrics.py +++ b/src/qp/metrics/array_metrics.py @@ -1,34 +1,68 @@ """This module implements metric calculations that are independent of qp.Ensembles""" import numpy as np +from scipy import stats from scipy.integrate import quad from scipy.optimize import minimize_scalar from qp.utils import safelog +def quick_anderson_darling(p_random_variables, scipy_distribution='norm'): + """Calculate the Anderson-Darling statistic using scipy.stats.anderson for one CDF vs a scipy distribution. + For more details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html -def quick_moment(p_eval, grid_to_N, dx): + Parameters + ---------- + p_random_variables : np.array + An array of random variables from the given distribution + scipy_distribution : {'norm', 'expon', 'logistic', 'gumbel', 'gumbel_l', 'gumbel_r', 'extreme1'}, optional + The type of distribution to test against. + + Returns + ------- + [Result objects] + A array of objects with attributes ``statistic``, ``critical_values``, and ``significance_level``. """ - Calculates a moment of an evaluated PDF + return stats.anderson(p_random_variables, dist=scipy_distribution) + +def quick_anderson_ksamp(p_random_variables, q_random_variables, **kwargs): + """Calculate the k-sample Anderson-Darling statistic using scipy.stats.anderson_ksamp for two CDFs. + For more details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html Parameters ---------- - p_eval: numpy.ndarray, float - the values of a probability distribution - grid: numpy.ndarray, float - the grid upon which p_eval was evaluated - dx: float - the difference between regular grid points - N: int - order of the moment to be calculated + p_random_variables : np.array + An array of random variables from the given distribution + q_random_variables : np.array + An array of random variables from the given distribution Returns ------- - M: float - value of the moment + [Result objects] + A array of objects with attributes ``statistic``, ``critical_values``, and ``significance_level``. """ - M = np.dot(p_eval, grid_to_N) * dx - return M + return stats.anderson_ksamp([p_random_variables, q_random_variables], **kwargs) + +def quick_cramer_von_mises(p_random_variables, q_cdf, **kwargs): + """Calculate the Cramer von Mises statistic using scipy.stats.cramervonmises for each pair of distributions + in two input Ensembles. For more details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.cramervonmises.html + + + Parameters + ---------- + p_random_variables : np.array + An array of random variables from the given distribution + q_cdf : callable + A function to calculate the CDF of a given distribution + + Returns + ------- + [Result objects] + A array of objects with attributes ``statistic`` and ``pvalue``. + """ + + return stats.cramervonmises(p_random_variables, q_cdf, **kwargs) def quick_kld(p_eval, q_eval, dx=0.01): """ @@ -56,6 +90,51 @@ def quick_kld(p_eval, q_eval, dx=0.01): Dpq = dx * np.sum(p_eval * logquotient, axis=-1) return Dpq +def quick_kolmogorov_smirnov(p_rvs, q_cdf, num_samples=100, **kwargs): + """Calculate the Kolmogorov-Smirnov statistic using scipy.stats.kstest for each pair of distributions + in two input Ensembles. For more details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kstest.html + + Parameters + ---------- + p_rvs : callable + A function to generate random variables for the given distribution + q_cdf : callable + A function to calculate the CDF of a given distribution + num_samples : int, optional + Number of samples to use in the calculation + + Returns + ------- + [KstestResult] + A array of KstestResult objects with attributes ``statistic`` and ``pvalue``. + """ + + return stats.kstest(p_rvs, q_cdf, N=num_samples, **kwargs) + +def quick_moment(p_eval, grid_to_N, dx): + """ + Calculates a moment of an evaluated PDF + + Parameters + ---------- + p_eval: numpy.ndarray, float + the values of a probability distribution + grid: numpy.ndarray, float + the grid upon which p_eval was evaluated + dx: float + the difference between regular grid points + N: int + order of the moment to be calculated + + Returns + ------- + M: float + value of the moment + """ + M = np.dot(p_eval, grid_to_N) * dx + return M + def quick_rmse(p_eval, q_eval, N): """ Calculates the Root Mean Square Error between two evaluations of PDFs. @@ -102,17 +181,19 @@ def quick_rbpe(pdf_function, integration_bounds, limits=(np.inf, np.inf)): """ def calculate_loss(x): - return 1. - (1. / (1. + (pow((x / .15), 2)))) + return 1.0 - (1.0 / (1.0 + (pow((x / 0.15), 2)))) lower = integration_bounds[0] upper = integration_bounds[1] def find_z_risk(zp): def integrand(z): - return pdf_function(z) * calculate_loss((zp - z) / (1. + z)) + return pdf_function(z) * calculate_loss((zp - z) / (1.0 + z)) return quad(integrand, lower, upper)[0] if limits[0] == np.inf: return minimize_scalar(find_z_risk).x - return minimize_scalar(find_z_risk, bounds=(limits[0], limits[1]), method='bounded').x + return minimize_scalar( + find_z_risk, bounds=(limits[0], limits[1]), method="bounded" + ).x diff --git a/src/qp/metrics/brier.py b/src/qp/metrics/brier.py new file mode 100644 index 00000000..f07f2f4e --- /dev/null +++ b/src/qp/metrics/brier.py @@ -0,0 +1,97 @@ +import logging +import numpy as np + + +class Brier: + """Brier score based on https://en.wikipedia.org/wiki/Brier_score#Original_definition_by_Brier + + Parameters + ---------- + prediction: NxM array, float + Predicted probability for N celestial objects to have a redshift in + one of M bins. The sum of values along each row N should be 1. + truth: NxM array, int + True redshift values for N celestial objects, where Mth bin for the + true redshift will have value 1, all other bins will have a value of + 0. + """ + + def __init__(self, prediction, truth): + """Constructor""" + + self._prediction = prediction + self._truth = truth + self._axis_for_summation = None # axis to sum for metric calculation + + def evaluate(self): + """Evaluate the Brier score. + + Returns + ------- + float + The result of calculating the Brier metric, a value in the interval [0,2] + """ + + self._manipulate_data() + self._validate_data() + return self._calculate_metric() + + def _manipulate_data(self): + """ + Placeholder for data manipulation as required. i.e. converting from + qp.ensemble objects into np.array objects. + """ + + # Attempt to convert the input variables into np.arrays + self._prediction = np.array(self._prediction) + self._truth = np.array(self._truth) + + def _validate_data(self): + """ + Strictly for data validation - no calculations or data structure + changes. + + Raises + ------ + TypeError if either prediction or truth input could not be converted + into a numeric Numpy array + + ValueError if the prediction and truth arrays do not have the same + numpy.shape. + + Warning + ------- + Logs a warning message if the input predictions do not each sum to 1. + """ + + # Raise TypeError exceptions if the inputs were not translated to + # numeric np.arrays + if not np.issubdtype(self._prediction.dtype, np.number): + raise TypeError( + "Input prediction array could not be converted to a Numpy array" + ) + if not np.issubdtype(self._truth.dtype, np.number): + raise TypeError("Input truth array could not be converted to a Numpy array") + + # Raise ValueError if the arrays have different shapes + if self._prediction.shape != self._truth.shape: + raise ValueError( + "Input prediction and truth arrays do not have the same shape" + ) + + # Log a warning if the N rows of the input prediction do not each sum to + # 1. Note: For 1d arrays, a sum along axis = 1 will fail, so we set + # self._axis_for_summation appropriately for that case + self._axis_for_summation = 0 if self._prediction.ndim == 1 else 1 + if not np.allclose( + np.sum(self._prediction, axis=self._axis_for_summation), 1.0 + ): + logging.warning("Input predictions do not sum to 1.") + + def _calculate_metric(self): + """ + Calculate the Brier metric for the input data. + """ + return np.mean( + np.sum((self._prediction - self._truth) ** 2, axis=self._axis_for_summation) + ) diff --git a/src/qp/metrics/metrics.py b/src/qp/metrics/metrics.py index 74696951..c9f15fd1 100644 --- a/src/qp/metrics/metrics.py +++ b/src/qp/metrics/metrics.py @@ -1,14 +1,15 @@ """This module implements some performance metrics for distribution parameterization""" - +import logging from collections import namedtuple from functools import partial import numpy as np import qp.metrics.array_metrics as array_metrics +from qp.metrics.brier import Brier from qp.utils import epsilon -Grid = namedtuple('Grid', ['grid_values', 'cardinality', 'resolution', 'limits']) +Grid = namedtuple('Grid', ['grid_values', 'cardinality', 'resolution', 'hist_bin_edges', 'limits']) def _calculate_grid_parameters(limits, dx:float=0.01) -> Grid: """ @@ -25,13 +26,18 @@ def _calculate_grid_parameters(limits, dx:float=0.01) -> Grid: grid_values: np.array with size = cardinality cardinality: int, number of elements in grid_value resolution: float, equal to grid_values[i] - grid_values[i-1] + hist_bin_edges: np.array with size = cardinality+1. + Equally spaced histogram bin edges starting at limit-resolution/2. + Assumes that grid_value[i] should be centered in the bin defined by + (hist_bin_edge[i], hist_bin_edge[i+1]). limits: 2-tuple, the limits passed in and used in this function """ cardinality = int((limits[-1] - limits[0]) / dx) grid_values = np.linspace(limits[0], limits[1], cardinality) resolution = (limits[-1] - limits[0]) / (cardinality - 1) + hist_bin_edges = np.histogram_bin_edges((limits[0]-resolution/2, limits[1]+resolution/2), cardinality) - return Grid(grid_values, cardinality, resolution, limits) + return Grid(grid_values, cardinality, resolution, hist_bin_edges, limits) def calculate_moment(p, N, limits, dx=0.01): """ @@ -161,7 +167,7 @@ def calculate_rbpe(p, limits=(np.inf, np.inf)): ---------- p: qp.Ensemble object Ensemble of PDFs to be evalutated - limits, tuple of floats + limits: tuple The limits at which to evaluate possible z_best estimates. If custom limits are not provided then all potential z value will be considered using the scipy.optimize.minimize_scalar function. @@ -187,3 +193,243 @@ def evaluate_pdf_at_z(z, dist): rbpes.append(array_metrics.quick_rbpe(this_dist_pdf_at_z, integration_bounds, limits)) return np.array(rbpes) + +def calculate_brier(p, truth, limits, dx=0.01): + """This function will do the following: + 1) Generate a Mx1 sized grid based on `limits` and `dx`. + 2) Produce an NxM array by evaluating the pdf for each of the N distribution objects in the Ensemble p on the grid. + 3) Produce an NxM truth_array using the input truth and the generated grid. All values will be 0 or 1. + 4) Create a Brier metric evaluation object + 5) Return the result of the Brier metric calculation. + + Parameters + ---------- + p: qp.Ensemble object + of N distributions probability distribution functions that will be gridded and compared against truth. + truth: Nx1 sequence + the list of true values, 1 per distribution in p. + limits: 2-tuple of floats + endpoints grid to evaluate the PDFs for the distributions in p + dx: float + resolution of the grid Defaults to 0.01. + + Returns + ------- + Brier_metric: float + """ + + # Ensure that the number of distributions objects in the Ensemble is equal to the length of the truth array + if p.npdf != len(truth): + raise ValueError("Number of distributions in the Ensemble (%d) is not equal to the number of truth values (%d)" % (p.npdf, len(truth))) + + # Values of truth that are outside the defined limits will not appear truth_array. + # Consider expanding the limits or using numpy.clip to restrict truth values to the limits. + if np.any(np.less(truth, limits[0])) or np.any(np.greater(truth, limits[1])): + raise ValueError("Input truth values exceed the defined limits") + + # Make a grid object that defines grid values and histogram bin edges using limits and dx + grid = _calculate_grid_parameters(limits, dx) + + # Evaluate the pdf of the distributions on the grid. + # The value returned from p.gridded is a 2-tuple. The 0th index is the array of grid points, + # the 1st index is the array of PDF values. Thus we call p.gridded(...)[1] + pdf_values = p.gridded(grid.grid_values)[1] + + # Create the NxM truth_array. + # Note np.histogram returns a 2-tuple. The 0th index is the histogram array, + # thus we call np.squeeze to remove extra dimensions. + truth_array = np.squeeze([np.histogram(t, grid.hist_bin_edges)[0] for t in truth]) + + # instantiate the Brier metric object + brier_metric_evaluation = Brier(pdf_values, truth_array) + + # return the results of evaluating the Brier metric + return brier_metric_evaluation.evaluate() + +def calculate_anderson_darling(p, scipy_distribution='norm', num_samples=100, _random_state=None): + """Calculate the Anderson-Darling statistic using scipy.stats.anderson for each distribution + in an Ensemble. For more details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html + + Parameters + ---------- + p : qp.Ensemble + An Ensemble of distributions to be tested + scipy_distribution : {'norm', 'expon', 'logistic', 'gumbel', 'gumbel_l', 'gumbel_r', 'extreme1'}, optional + The type of distribution to test against. + num_samples : int, optional + Number of random variable samples to generate for each distribution in the calculation + _random_state : int, optional + For testing purposes only, this is used to specify a reproducible set of random variables. + + Returns + ------- + [Result objects] + A array of objects with attributes ``statistic``, ``critical_values``, and ``significance_level``. + """ + + try: + _check_ensemble_is_not_nested(p) + except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning("Each element in the ensemble `p` must be a single distribution.") + + # Pass an array of random variables and the name of a scipy distribution to the quick anderson darling function + output = [ + array_metrics.quick_anderson_darling( + np.squeeze(p_dist.rvs(size=num_samples, random_state=_random_state)), + scipy_distribution=scipy_distribution + ) + for p_dist in p + ] + + return output + +def calculate_cramer_von_mises(p, q, num_samples=100, _random_state=None, **kwargs): + """Calculate the Cramer von Mises statistic using scipy.stats.cramervonmises for each pair of distributions + in two input Ensembles. For more details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.cramervonmises.html + + Parameters + ---------- + p : qp.Ensemble + An Ensemble of distributions to be tested + q : qp.Ensemble + A second Ensemble of distributions each with a defined ``cdf`` method, to be tested against + num_samples : int, optional + Number of random variable samples to generate for the calculation + _random_state : int, optional + For testing purposes only, this is used to specify a reproducible set of random variables. + + Returns + ------- + [Result objects] + A array of objects with attributes ``statistic`` and ``pvalue``. + """ + + try: + _check_ensembles_are_same_size(p, q) + except ValueError: #pragma: no cover - unittest coverage for _check_ensembles_are_same_size is complete + logging.warning("Input ensembles should have the same number of distributions") + + try: + _check_ensemble_is_not_nested(p) + except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning("Each element in the ensemble `p` must be a single distribution.") + + try: + _check_ensemble_is_not_nested(q) + except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning("Each element in the ensemble `q` must be a single distribution.") + + # Pass an array of random variables and a cdf callable for each pair of distributions to the quick cvm statistic function + output = [ + array_metrics.quick_cramer_von_mises( + np.squeeze(p_dist.rvs(size=num_samples, random_state=_random_state)), + q_dist.cdf, + **kwargs + ) + for p_dist, q_dist in zip(p, q) + ] + + return output + +def calculate_kolmogorov_smirnov(p, q, num_samples=100, **kwargs): + """Calculate the Kolmogorov-Smirnov statistic using scipy.stats.kstest for each pair of distributions + in two input Ensembles. For more details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kstest.html + + Parameters + ---------- + p : qp.Ensemble + An Ensemble of distributions to be tested + q : qp.Ensemble + A second Ensemble of distributions to be tested + num_samples : int, optional + Number of samples to use in the calculation + + Returns + ------- + [KstestResult] + A array of KstestResult objects with attributes ``statistic`` and ``pvalue``. + """ + + try: + _check_ensembles_are_same_size(p, q) + except ValueError: #pragma: no cover - unittest coverage for _check_ensembles_are_same_size is complete + logging.warning("Input ensembles should have the same number of distributions") + + try: + _check_ensemble_is_not_nested(p) + except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning("Each element in the ensemble `p` must be a single distribution.") + + try: + _check_ensemble_is_not_nested(q) + except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning("Each element in the ensemble `q` must be a single distribution.") + + # Pass the rvs and cdf functions for each pair of distributions to the quick ks statistic function + output = [ + array_metrics.quick_kolmogorov_smirnov( + p_dist.rvs, + q_dist.cdf, + num_samples=num_samples, + **kwargs + ) + for p_dist, q_dist in zip(p, q) + ] + + return output + +def calculate_outlier_rate(p, lower_limit=0.0001, upper_limit=0.9999): + """Fraction of outliers in each distribution + + Parameters + ---------- + p : qp.Ensemble + A collection of N distributions. This implementation expects that Ensembles are not nested. + lower_limit : float, optional + Lower bound for outliers, by default 0.0001 + upper_limit : float, optional + Upper bound for outliers, by default 0.9999 + + Returns + ------- + [float] + 1xN array where each element is the percent of outliers for a distribution in the Ensemble. + """ + + # Validate that all the distributions in the Ensemble are single distributions - i.e. no nested Ensembles + try: + _check_ensemble_is_not_nested(p) + except ValueError: #pragma: no cover - unittest coverage for _check_ensemble_is_not_nested is complete + logging.warning("Each element in the ensemble `p` must be a single distribution.") + + outlier_rates = [(dist.cdf(lower_limit) + (1. - dist.cdf(upper_limit)))[0][0] for dist in p] + return outlier_rates + +def _check_ensembles_are_same_size(p, q): + """This utility function ensures checks that two Ensembles contain an equal number of distribution objects. + + Args: + p qp.Ensemble: An Ensemble containing 0 or more distributions + q qp.Ensemble: A second Ensemble containing 0 or more distributions + + Raises: + ValueError: If the result of evaluating qp.Ensemble.npdf on each Ensemble is not the same, raise an error. + """ + if p.npdf != q.npdf: + raise ValueError("Input ensembles should have the same number of distributions") + +def _check_ensemble_is_not_nested(p): + """This utility function ensures that each element in the Ensemble is a single distribution. + + Args: + p qp.Ensemble: An Ensemble that could contain nested Ensembles with multiple distributions in each + + Raises: + ValueError: If there are any elements of the input Ensemble that contain more than 1 PDF, raise an error. + """ + for dist in p: + if dist.npdf != 1: + raise ValueError("Each element in the input Ensemble should be a single distribution.") diff --git a/src/qp/metrics/pit.py b/src/qp/metrics/pit.py new file mode 100644 index 00000000..a3fc52cc --- /dev/null +++ b/src/qp/metrics/pit.py @@ -0,0 +1,194 @@ +import logging +import numpy as np +from scipy import stats +import qp +from qp.metrics.metrics import calculate_outlier_rate +from qp.metrics.array_metrics import quick_anderson_ksamp, quick_cramer_von_mises, quick_kolmogorov_smirnov + +DEFAULT_QUANTS = np.linspace(0, 1, 100) + +class PIT(): + """PIT(qp_ens, true_vals, eval_grid=DEFAULT_QUANTS) + Probability Integral Transform + + Parameters + ---------- + qp_ens : Ensemble + A collection of N distribution objects + true_vals : [float] + An array-like sequence of N float values representing the known true value for each distribution + eval_grid : [float], optional + A strictly increasing array-like sequence in the range [0,1], by default DEFAULT_QUANTS + + Returns + ------- + PIT object + An object with an Ensemble containing the PIT distribution, and a full set of PIT samples. + """ + + def __init__(self, qp_ens, true_vals, eval_grid=DEFAULT_QUANTS): + """We will create a quantile Ensemble to store the PIT distribution, but also store the + full set of PIT samples as ancillary data of the (single PDF) ensemble. + + Parameters + ---------- + qp_ens : Ensemble + A collection of N distribution objects + true_vals : [float] + An array-like sequence of N float values representing the known true value for each distribution + eval_grid : [float], optional + A strictly increasing array-like sequence in the range [0,1], by default DEFAULT_QUANTS + """ + + self._true_vals = true_vals + + # For each distribution in the Ensemble, calculate the CDF where x = known_true_value + self._pit_samps = np.array([qp_ens[i].cdf(self._true_vals[i])[0][0] for i in range(len(self._true_vals))]) + + n_pit = np.min([len(self._pit_samps), len(eval_grid)]) + if n_pit < len(eval_grid): + logging.warning('Number of pit samples is smaller than the evaluation grid size. Will create a new evaluation grid with size = number of pit samples') + eval_grid = np.linspace(0, 1, n_pit) + + data_quants = np.quantile(self._pit_samps, eval_grid) + self._pit = qp.Ensemble(qp.quant_piecewise, data=dict(quants=eval_grid, locs=np.atleast_2d(data_quants))) + + @property + def pit_samps(self): + """Returns the PIT samples. i.e. ``CDF(true_vals)`` for each distribution in the Ensemble used to initialize the PIT object. + + Returns + ------- + np.array + An array of floats + """ + return self._pit_samps + + @property + def pit(self): + """Return the PIT Ensemble object + + Returns + ------- + qp.Ensemble + An Ensemble containing 1 qp.quant_piecewise distribution. + """ + return self._pit + + def calculate_pit_meta_metrics(self): + """Convenience method that will calculate all of the PIT meta metrics and return them + as a dictionary. + + Returns + ------- + dictionary + The collection of PIT statistics + """ + pit_meta_metrics = {} + + pit_meta_metrics['ad'] = self.evaluate_PIT_anderson_ksamp() + pit_meta_metrics['cvm'] = self.evaluate_PIT_CvM() + pit_meta_metrics['ks'] = self.evaluate_PIT_KS() + pit_meta_metrics['outlier_rate'] = self.evaluate_PIT_outlier_rate() + + return pit_meta_metrics + + def evaluate_PIT_anderson_ksamp(self, pit_min=0., pit_max=1.): + """Use scipy.stats.anderson_ksamp to compute the Anderson-Darling statistic + for the cdf(truth) values by comparing with a uniform distribution between 0 and 1. + Up to the current version (1.9.3), scipy.stats.anderson does not support + uniform distributions as reference for 1-sample test, therefore we create a uniform + "distribution" and pass it as the second value in the list of parameters to the scipy + implementation of k-sample Anderson-Darling. + For details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html + + Parameters + ---------- + pit_min : float, optional + Minimum PIT value to accept, by default 0. + pit_max : float, optional + Maximum PIT value to accept, by default 1. + + Returns + ------- + [Result objects] + A array of objects with attributes `statistic`, `critical_values`, and `significance_level`. + For details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html + """ + # Removed the CDF values that are outside the min/max range + trimmed_pit_values = self._trim_pit_values(pit_min, pit_max) + + uniform_yvals = np.linspace(pit_min, pit_max, len(trimmed_pit_values)) + + return quick_anderson_ksamp(trimmed_pit_values, uniform_yvals) + + def evaluate_PIT_CvM(self): + """Calculate the Cramer von Mises statistic using scipy.stats.cramervonmises using self._pit_samps + compared to a uniform distribution. For more details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.cramervonmises.html + + Returns + ------- + [Result objects] + A array of objects with attributes `statistic` and `pvalue` + For details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.cramervonmises.html + """ + return quick_cramer_von_mises(self._pit_samps, stats.uniform.cdf) + + def evaluate_PIT_KS(self): + """Calculate the Kolmogorov-Smirnov statistic using scipy.stats.kstest. For more details see: + https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kstest.html + + Returns + ------- + [Return Object] + A array of objects with attributes `statistic` and `pvalue`. + For details see: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kstest.html + """ + return quick_kolmogorov_smirnov(self._pit_samps, stats.uniform.cdf) + + def evaluate_PIT_outlier_rate(self, pit_min=0.0001, pit_max=0.9999): + """Compute fraction of PIT outliers by evaluating the CDF of the distribution in the PIT Ensemble + at `pit_min` and `pit_max`. + + Parameters + ---------- + pit_min : float, optional + Lower bound for outliers, by default 0.0001 + pit_max : float, optional + Upper bound for outliers, by default 0.9999 + + Returns + ------- + float + The percentage of outliers in this distribution given the min and max bounds. + """ + return calculate_outlier_rate(self._pit, pit_min, pit_max) + + def _trim_pit_values(self, cdf_min, cdf_max): + """Remove and report any cdf(x) that are outside the min/max range. + + Parameters + ---------- + cdf_min : float + The minimum cdf(x) value to accept + cdf_max : float + The maximum cdf(x) value to accept + + Returns + ------- + [float] + The list of PIT values within the min/max range. + """ + # Create truth mask for pit values between cdf_min and pit max + mask = (self._pit_samps >= cdf_min) & (self._pit_samps <= cdf_max) + + # Keep pit values that are within the min/max range + pits_clean = self._pit_samps[mask] + + # Determine how many pit values were dropped and warn the user. + diff = len(self._pit_samps) - len(pits_clean) + if diff > 0: + logging.warning("Removed %d PITs from the sample.", diff) + + return pits_clean diff --git a/src/qp/pdf_gen.py b/src/qp/pdf_gen.py index 57f6806b..e43d8d18 100644 --- a/src/qp/pdf_gen.py +++ b/src/qp/pdf_gen.py @@ -321,7 +321,7 @@ def _argcheck(self, *args): return np.atleast_1d(cond) def freeze(self, *args, **kwds): - """Freeze the distribution for the given arguments.9999999 + """Freeze the distribution for the given arguments. Parameters ---------- diff --git a/tests/qp/test_brier.py b/tests/qp/test_brier.py new file mode 100644 index 00000000..07c2f150 --- /dev/null +++ b/tests/qp/test_brier.py @@ -0,0 +1,135 @@ +import unittest +import logging +import numpy as np +from qp.metrics.brier import Brier + +LOGGER = logging.getLogger(__name__) + +class BrierTestCase(unittest.TestCase): + """ Test cases for the Brier metric. """ + + def test_brier_base(self): + """ + Test the base case, ensure output is expected. + """ + pred = [[1,0,0], [1,0,0]] + truth = [[1,0,0], [0,1,0]] + brier_obj = Brier(pred, truth) + result = brier_obj.evaluate() + expected = 1. + assert np.isclose(result, expected) + + def test_brier_result_is_scalar(self): + """ + Verify output is scalar for input of NxM. + """ + pred = [[1,0,0], [0,1,0], [0,0.5,0.5]] + truth = [[1,0,0], [0,1,0], [0,0,1]] + brier_obj = Brier(pred, truth) + result = brier_obj.evaluate() + assert np.isscalar(result) + + def test_brier_base_with_non_integers(self): + """ + Verify output for non-integer prediction values. + """ + pred = [[0.5,0.5,0]] + truth = [[1,0,0]] + brier_obj = Brier(pred, truth) + result = brier_obj.evaluate() + expected = 0.5 + assert np.isclose(result, expected) + + def test_brier_max_result(self): + """ + Base case where prediction is completely wrong, should produce maximum + possible result value, 2. + """ + pred = [[0,1,0], [1,0,0]] + truth = [[1,0,0], [0,1,0]] + brier_obj = Brier(pred, truth) + result = brier_obj.evaluate() + expected = 2. + assert np.isclose(result, expected) + + def test_brier_min_result(self): + """ + Base case where prediction is perfect, should produce minimum possible + result value, 0. + """ + pred = [[1,0,0], [0,1,0]] + truth = [[1,0,0], [0,1,0]] + brier_obj = Brier(pred, truth) + result = brier_obj.evaluate() + expected = 0. + assert np.isclose(result, expected) + + def test_brier_input_arrays_different_sizes(self): + """ + Verify exception is raised when input arrays are different sizes. + """ + pred = [[1,0,0], [0,1,0]] + truth = [[1,0,0], [0,1,0], [0,0,0]] + brier_obj = Brier(pred, truth) + with self.assertRaises(ValueError): + _ = brier_obj.evaluate() + + def test_brier_with_garbage_prediction_input(self): + """ + Verify exception is raised when prediction input is non-numeric. + """ + pred = ["foo", "bar"] + truth = [[1,0,0],[0,1,0]] + brier_obj = Brier(pred, truth) + with self.assertRaises(TypeError): + _ = brier_obj.evaluate() + + def test_brier_with_garbage_truth_input(self): + """ + Verify exception is raised when truth input is non-numeric. + """ + pred = [[1,0,0], [0,1,0]] + truth = ["hello sky", "goodbye ground"] + brier_obj = Brier(pred, truth) + with self.assertRaises(TypeError): + _ = brier_obj.evaluate() + + def test_brier_prediction_does_not_sum_to_one(self): + """ + Verify exception is raised when prediction input rows don't sum to 1 This + also verifies that while the total sum of values in the prediction array sum + to 2, the individual rows do not, and thus logs a warning + """ + pred = [[1,0.0001,0], [0,0.9999,0]] + truth = [[1,0,0], [0,1,0]] + LOGGER.info('Testing now...') + brier_obj = Brier(pred, truth) + with self.assertLogs() as captured: + _ = brier_obj.evaluate() + self.assertEqual(captured.records[0].getMessage(), "Input predictions do not sum to 1.") + + def test_brier_1d_prediction_does_not_sum_to_one(self): + """ + Verify exception is raised when 1d prediction input rows don't sum to 1 + """ + pred = [0.3,0.8,0] + truth = [1,0,0] + LOGGER.info('Testing now...') + brier_obj = Brier(pred, truth) + with self.assertLogs(level=logging.WARNING) as captured: + # with caplog.at_level(logging.WARNING): + _ = brier_obj.evaluate() + self.assertEqual(captured.records[0].getMessage(), "Input predictions do not sum to 1.") + + def test_brier_1d(self): + """ + Verify 1 dimensional input produced the correct output. This exercises the + condition in brier._calculate_metric that changes the axis upon which the + np.mean operates. + """ + pred = [1,0,0] + truth = [1,0,0] + brier_obj = Brier(pred, truth) + result = brier_obj.evaluate() + expected = 0. + assert np.isclose(result, expected) diff --git a/tests/qp/test_metrics.py b/tests/qp/test_metrics.py index ccd77e4f..f4fe1b39 100644 --- a/tests/qp/test_metrics.py +++ b/tests/qp/test_metrics.py @@ -8,6 +8,7 @@ import numpy as np from qp import test_funcs +from qp.metrics.metrics import * from qp.utils import epsilon @@ -33,6 +34,7 @@ def tearDown(self): """ Clean up any mock data files created by the tests. """ def test_calculate_grid_parameters(self): + """ Given a small, simple input, ensure that the grid parameters are correct. """ limits = (0,1) dx = 1./11 grid_params = qp.metrics._calculate_grid_parameters(limits, dx) #pylint: disable=W0212 @@ -41,12 +43,35 @@ def test_calculate_grid_parameters(self): assert grid_params.grid_values[0] == limits[0] assert grid_params.grid_values[-1] == limits[-1] assert grid_params.grid_values.size == grid_params.cardinality + assert grid_params.hist_bin_edges[0] == limits[0] - grid_params.resolution/2 + assert grid_params.hist_bin_edges[-1] == limits[-1] + grid_params.resolution/2 + assert grid_params.hist_bin_edges.size == grid_params.cardinality + 1 + + def test_calculate_grid_parameters_larger_range(self): + """ Test that a large range in limits and small delta returns expected results """ + limits = (-75,112) + dx = 0.042 + grid_params = qp.metrics._calculate_grid_parameters(limits, dx) #pylint: disable=W0212 + assert grid_params.grid_values[0] == limits[0] + assert grid_params.grid_values[-1] == limits[-1] + assert grid_params.grid_values.size == grid_params.cardinality + assert grid_params.hist_bin_edges[0] == limits[0] - grid_params.resolution/2 + assert grid_params.hist_bin_edges[-1] == limits[-1] + grid_params.resolution/2 + assert grid_params.hist_bin_edges.size == grid_params.cardinality + 1 def test_kld(self): """ Test the calculate_kld method """ kld = qp.metrics.calculate_kld(self.ens_n, self.ens_n_shift, limits=(0.,2.5)) assert np.all(kld == 0.) + def test_calculate_moment(self): + """ Base case test """ + moment = 1 + limits = (-2., 2.) + result = calculate_moment(self.ens_n, moment, limits) + + self.assertTrue(result is not None) + def test_kld_alternative_ensembles(self): """ Test the calculate_kld method against different types of ensembles """ bins = np.linspace(-5, 5, 11) @@ -199,6 +224,95 @@ def test_rbpe_multiple_pdfs(self): error_msg = 'quick_rbpe only handles Ensembles with a single PDF' self.assertTrue(error_msg in str(context.exception)) + def test_calculate_brier(self): + """ Base test case of Ensemble-based brier metric """ + truth = 2* (np.random.uniform(size=(11,1))-0.5) + limits = [-2., 2] + result = calculate_brier(self.ens_n, truth, limits) + self.assertTrue(result is not None) + + def test_calculate_brier_mismatched_number_of_truths(self): + """ Expect an exception when number of truth values doesn't match number of distributions """ + truth = 2* (np.random.uniform(size=(10,1))-0.5) + limits = [-2., 2] + with self.assertRaises(ValueError) as context: + _ = calculate_brier(self.ens_n, truth, limits) + + error_msg = "Number of distributions in the Ensemble" + self.assertTrue(error_msg in str(context.exception)) + + def test_calculate_brier_truth_outside_of_limits(self): + """ Expect an exception when truth is outside of the limits """ + truth = 2* (np.random.uniform(size=(10,1))-0.5) + truth = np.append(truth, 100) + limits = [-2., 2] + with self.assertRaises(ValueError) as context: + _ = calculate_brier(self.ens_n, truth, limits) + + error_msg = "Input truth values exceed the defined limits" + self.assertTrue(error_msg in str(context.exception)) + + def test_calculate_outlier_rate(self): + """Base case test""" + output = qp.metrics.calculate_outlier_rate(self.ens_n[0]) + self.assertTrue(len(output) == 1) + self.assertTrue(np.isclose(output[0], 0.012436869911068668)) + + def test_calculate_outlier_rate_with_bounds(self): + """Include min/max bounds for outlier rate""" + output = qp.metrics.calculate_outlier_rate(self.ens_n[0], -10, 10) + self.assertTrue(len(output) == 1) + self.assertTrue(np.isclose(output[0], 0)) + + def test_calculate_outlier_rate_many_distributions(self): + """Check that the outlier rate is correctly calculated for an Ensemble with many distributions""" + output = qp.metrics.calculate_outlier_rate(self.ens_n) + self.assertTrue(len(output) == 11) + + def test_calculate_kolmogorov_smirnov(self): + """Bare minimum test to ensure that the data is flowing correctly""" + output = qp.metrics.calculate_kolmogorov_smirnov(self.ens_n, self.ens_n) + self.assertTrue(len(output) == self.ens_n.npdf) + + def test_calculate_cramer_von_mises(self): + """Bare minimum test to ensure that the data is flowing correctly""" + output = qp.metrics.calculate_cramer_von_mises(self.ens_n, self.ens_n) + self.assertTrue(len(output) == self.ens_n.npdf) + + def test_calculate_anderson_darling(self): + """Bare minimum test to ensure that the data is flowing correctly""" + output = qp.metrics.calculate_anderson_darling(self.ens_n, 'norm') + self.assertTrue(len(output) == self.ens_n.npdf) + + def test_check_ensembles_are_same_size(self): + """Test that no Value Error is raised when the ensembles are the same size""" + try: + qp.metrics._check_ensembles_are_same_size(self.ens_n, self.ens_n_shift) #pylint: disable=W0212 + except ValueError: + self.fail("Unexpectedly raised ValueError") + + def test_check_ensembles_are_same_size_asserts(self): + """Test that a Value Error is raised when the ensembles are not the same size""" + with self.assertRaises(ValueError) as context: + qp.metrics._check_ensembles_are_same_size(self.ens_n, self.ens_n_plus_one) #pylint: disable=W0212 + + error_msg = "Input ensembles should have the same number of distributions" + self.assertTrue(error_msg in str(context.exception)) + + def test_check_ensemble_is_not_nested_with_flat_ensemble(self): + """Test that no ValueError is raised when a flat Ensemble is passed in""" + try: + qp.metrics._check_ensemble_is_not_nested(self.ens_n) #pylint: disable=W0212 + except ValueError: + self.fail("Unexpectedly raised ValueError") + + def test_check_ensemble_is_not_nested_with_nested_ensemble(self): + """Test that a ValueError is raised when a nested Ensemble is passed in""" + with self.assertRaises(ValueError) as context: + qp.metrics._check_ensemble_is_not_nested(self.ens_n_multi) #pylint: disable=W0212 + + error_msg = "Each element in the input Ensemble should be a single distribution." + self.assertTrue(error_msg in str(context.exception)) if __name__ == '__main__': unittest.main() diff --git a/tests/qp/test_pit.py b/tests/qp/test_pit.py new file mode 100644 index 00000000..724f76f6 --- /dev/null +++ b/tests/qp/test_pit.py @@ -0,0 +1,71 @@ +import unittest +import numpy as np +import qp +from qp import interp_gen +from qp.ensemble import Ensemble +from qp.metrics.pit import PIT + + +# constants for tests +NMAX = 2.5 +NPDF = 399 +ZGRID = np.linspace(0, NMAX, 301) + +ADVAL_ALL = 82.51480 +ADVAL_CUT = 1.10750 +CVMVAL = 20.63155 +KSVAL = 0.367384 +OUTRATE = 0.0 + +# These constants retained for future use +# CDEVAL = -4.31200 +# SIGIQR = 0.0045947 +# BIAS = -0.00001576 +# SIGMAD = 0.0046489 + +class PitTestCase(unittest.TestCase): + """ Test cases for PIT metric. """ + + def setUp(self): + np.random.seed(87) + self.true_zs = np.random.uniform(high=NMAX, size=NPDF) + + locs = np.expand_dims(self.true_zs + np.random.normal(0.0, 0.01, NPDF), -1) + scales = np.ones((NPDF, 1)) * 0.1 + np.random.uniform(size=(NPDF, 1)) * .05 + self.n_ens = Ensemble(qp.stats.norm, data=dict(loc=locs, scale=scales)) + + self.grid_ens = self.n_ens.convert_to(interp_gen, xvals=ZGRID) + + def test_pit_metrics(self): + """Base test of PIT metric generation""" + quant_grid = np.linspace(0, 1, 101) + pit_obj = PIT(self.grid_ens, self.true_zs, quant_grid) + + pit_samples = pit_obj.pit_samps + self.assertTrue(len(pit_samples) == 399) + + pit_ensemble = pit_obj.pit + self.assertTrue(pit_ensemble.npdf == 1) + + meta_metrics = pit_obj.calculate_pit_meta_metrics() + + ad_stat = meta_metrics['ad'].statistic + assert np.isclose(ad_stat, ADVAL_ALL) + + cut_ad_stat = pit_obj.evaluate_PIT_anderson_ksamp(pit_min=0.6, pit_max=0.9).statistic + assert np.isclose(cut_ad_stat, ADVAL_CUT) + + cvm_stat = meta_metrics['cvm'].statistic + assert np.isclose(cvm_stat, CVMVAL) + + ks_stat = meta_metrics['ks'].statistic + assert np.isclose(ks_stat, KSVAL) + + assert np.isclose(meta_metrics['outlier_rate'], OUTRATE) + + def test_pit_metric_small_eval_grid(self): + """Test PIT metric warning message when number of pit samples is smaller than the evaluation grid""" + with self.assertLogs(level='WARNING') as log: + quant_grid = np.linspace(0, 1, 1000) + _ = PIT(self.grid_ens, self.true_zs, quant_grid) + self.assertIn('Number of pit samples is smaller', log.output[0])