address catalog binning issue and minor hotfix for catalog evaluations (#89)

wsavran · web-flow · commit c6f01f3532f7 · 2021-03-23T12:56:59.000-07:00
* fixes issue #86 and adds more stringent testing for spatial regions - allow calibration tests to properly handle lists of evaluation results with not-valid test results - change to numpy.testing.assert_array_equal() from assert_allclose() - include function to generate cleaner_range() to represent nodes of carteisan grid - added unit tests for cleaner_range() - updated CartesianGrid2D region to use cleaner_range() function to generate bbox grid - added test using example earthquakes listed in #86
diff --git a/csep/core/catalog_evaluations.py b/csep/core/catalog_evaluations.py
@@ -124,7 +124,6 @@ def spatial_test(forecast, observed_catalog):
 
     return result
 
-
 def magnitude_test(forecast, observed_catalog):
     """ Performs magnitude test for catalog-based forecasts """
     test_distribution = []
@@ -292,7 +291,13 @@ def calibration_test(evaluation_results, delta_1=False):
 
     # this is using "delta_2" which is the cdf value less-equal
     idx = 0 if delta_1 else 1
-    quantiles = [result.quantile[idx] for result in evaluation_results]
+    quantiles = []
+    for result in evaluation_results:
+        if result.status == 'not-valid':
+            print(f'evaluation not valid for {result.name}. skipping in calibration test.')
+        else:
+            quantiles.append(result.quantile[idx])
+
     ks, p_value = scipy.stats.kstest(quantiles, 'uniform')
 
     result = CalibrationTestResult(
diff --git a/csep/core/forecasts.py b/csep/core/forecasts.py
@@ -217,6 +217,7 @@ def get_magnitude_index(self, mags, tol=0.00001):
             raise ValueError("mags outside the range of forecast magnitudes.")
         return idm
 
+
 class GriddedForecast(MarkedGriddedDataSet):
     """ Class to represent grid-based forecasts """
 
diff --git a/csep/core/regions.py b/csep/core/regions.py
@@ -11,7 +11,7 @@
 import pyproj
 
 # PyCSEP imports
-from csep.utils.calc import bin1d_vec
+from csep.utils.calc import bin1d_vec, cleaner_range
 from csep.utils.scaling_relationships import WellsAndCoppersmith
 
 def california_relm_collection_region(dh_scale=1, magnitudes=None, name="relm-california-collection"):
@@ -172,14 +172,9 @@ def global_region(dh=0.1, name="global", magnitudes=None):
         csep.utils.CartesianGrid2D:
     """
     # generate latitudes
-    const = 1000000
-    start_lat = numpy.floor(-90 * const)
-    end_lat = numpy.floor(90 * const)
-    start_lon = numpy.floor(-180 * const)
-    end_lon = numpy.floor(180 * const)
-    d = numpy.floor(const * dh)
-    lats = numpy.arange(start_lat, end_lat, d) / const
-    lons = numpy.arange(start_lon, end_lon, d) / const
+
+    lons = cleaner_range(-180.0, 179.9, dh)
+    lats = cleaner_range(-90, 89.9, dh)
     coords = itertools.product(lons,lats)
     region = CartesianGrid2D([Polygon(bbox) for bbox in compute_vertices(coords, dh)], dh, name=name)
     if magnitudes is not None:
@@ -707,13 +702,9 @@ def _build_bitmask_vec(self):
         # get midpoints for hashing
         midpoints = numpy.array([poly.centroid() for poly in self.polygons])
 
-        # compute nx and ny
-        nx = numpy.rint((bbox[1][0] - bbox[0][0]) / self.dh)
-        ny = numpy.rint((bbox[1][1] - bbox[0][1]) / self.dh)
-
-        # set up grid of bounding box
-        xs = self.dh * numpy.arange(nx + 1) + bbox[0][0]
-        ys = self.dh * numpy.arange(ny + 1) + bbox[0][1]
+        # set up grid over bounding box
+        xs = cleaner_range(bbox[0][0], bbox[1][0], self.dh)
+        ys = cleaner_range(bbox[0][1], bbox[1][1], self.dh)
 
         # set up mask array, 1 is index 0 is mask
         a = numpy.ones([len(ys), len(xs), 2])
diff --git a/csep/utils/calc.py b/csep/utils/calc.py
@@ -192,4 +192,25 @@ def _distribution_test(stochastic_event_set_data, observation_data):
     # score evaluation
     _, quantile = get_quantiles(test_distribution, d_obs)
 
-    return test_distribution, d_obs, quantile
+    return test_distribution, d_obs, quantile
+
+def cleaner_range(start, end, h):
+    """ Returns array holding bin edges that doesn't contain floating point wander.
+
+    Floating point wander can occur when repeatedly adding floating point numbers together. The errors propogate and become worse over the sum. This function generates the
+    values on an integer grid and converts back to floating point numbers through multiplication.
+
+     Args:
+        start (float)
+        end (float)
+        h (float): magnitude spacing
+
+    Returns:
+        bin_edges (numpy.ndarray)
+    """
+    # convert to integers to prevent accumulating floating point errors
+    const = 100000
+    start = numpy.floor(const * start)
+    end = numpy.floor(const * end)
+    d = const * h
+    return numpy.arange(start, end + d / 2, d) / const
diff --git a/run_tests.sh b/run_tests.sh
@@ -1,2 +1,2 @@
 #!/usr/bin/env bash
-pytest
+pytest -v
diff --git a/tests/test_calc.py b/tests/test_calc.py
@@ -1,5 +1,30 @@
 import unittest
-from csep.utils.calc import bin1d_vec
+import numpy
+from csep.utils.calc import bin1d_vec, cleaner_range
+
+
+class TestCleanerRange(unittest.TestCase):
+
+    def setUp(self):
+
+        self.start = 0.0
+        self.end = 0.9
+        self.dh = 0.1
+        self.truth = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+    def test_discrepancy_with_arange_catch_failure(self):
+
+        ar = numpy.arange(self.start, self.end + self.dh / 2, self.dh)
+        cr = cleaner_range(self.start, self.end, self.dh)
+
+        self.assertRaises(AssertionError, numpy.testing.assert_array_equal, ar, cr)
+        self.assertRaises(AssertionError, numpy.testing.assert_array_equal, ar, self.truth)
+
+
+    def test_discrepancy_with_direct_input(self):
+
+        cr = cleaner_range(self.start, self.end, self.dh)
+        numpy.testing.assert_array_equal(self.truth, cr)
 
 class TestBin1d(unittest.TestCase):
 
diff --git a/tests/test_spatial.py b/tests/test_spatial.py
@@ -5,7 +5,7 @@
 import numpy
 
 from csep.core.regions import CartesianGrid2D, compute_vertex, compute_vertices, _bin_catalog_spatio_magnitude_counts, \
-    _bin_catalog_spatial_counts, _bin_catalog_probability, Polygon
+    _bin_catalog_spatial_counts, _bin_catalog_probability, Polygon, global_region
 
 
 class TestPolygon(unittest.TestCase):
@@ -63,27 +63,31 @@ def test_object_creation(self):
         self.assertEqual(self.cart_grid.num_nodes, self.num_nodes, 'num nodes is not correct')
 
     def test_xs_and_xy_correct(self):
-        numpy.testing.assert_allclose(self.cart_grid.xs, numpy.arange(0,self.nx)*self.dh)
-        numpy.testing.assert_allclose(self.cart_grid.ys, numpy.arange(0,self.ny)*self.dh)
+
+        test_xs = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
+        test_ys = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+
+        numpy.testing.assert_array_equal(self.cart_grid.xs, test_xs)
+        numpy.testing.assert_array_equal(self.cart_grid.ys, test_ys)
 
     def test_bitmask_indices_mapping(self):
         test_idx = self.cart_grid.idx_map[1,0]
-        numpy.testing.assert_allclose(test_idx, 0, err_msg='mapping for first polygon index (good) not correct')
+        numpy.testing.assert_array_equal(test_idx, 0, err_msg='mapping for first polygon index (good) not correct')
 
         test_idx = self.cart_grid.idx_map[0,1]
-        numpy.testing.assert_allclose(test_idx, 9, err_msg='mapping for polygon (good) not correct.')
+        numpy.testing.assert_array_equal(test_idx, 9, err_msg='mapping for polygon (good) not correct.')
 
         test_idx = self.cart_grid.idx_map[2,0]
-        numpy.testing.assert_allclose(test_idx, 1, err_msg='mapping for polygon (good) not correct.')
+        numpy.testing.assert_array_equal(test_idx, 1, err_msg='mapping for polygon (good) not correct.')
 
         test_idx = self.cart_grid.idx_map[0,2]
-        numpy.testing.assert_allclose(test_idx, 19, err_msg='mapping for polygon (good) not correct.')
+        numpy.testing.assert_array_equal(test_idx, 19, err_msg='mapping for polygon (good) not correct.')
 
         test_idx = self.cart_grid.idx_map[-1,-1]
-        numpy.testing.assert_allclose(test_idx, numpy.nan, err_msg='mapping for last index (bad) not correct.')
+        numpy.testing.assert_array_equal(test_idx, numpy.nan, err_msg='mapping for last index (bad) not correct.')
 
         test_idx = self.cart_grid.idx_map[0,0]
-        numpy.testing.assert_allclose(test_idx, numpy.nan, err_msg='mapping for first index (bad) not correct.')
+        numpy.testing.assert_array_equal(test_idx, numpy.nan, err_msg='mapping for first index (bad) not correct.')
 
     def test_domain_mask(self):
         test_flag = self.cart_grid.bbox_mask[0, 0]
@@ -222,5 +226,24 @@ def test_bin_spatial_magnitudes(self):
         self.assertEqual(test_result[0, 1], 1)
         self.assertEqual(test_result[9, 0], 1)
 
+
+    def test_global_region_binning(self):
+
+        gr = global_region()
+
+        # test points
+        lons = numpy.array([-178.6, -178.6, -178.02, -177.73, -177.79])
+        lats = numpy.array([-15.88, -51.75, -30.61, -29.98, -30.6])
+
+        # directly compute the indexes from the region object
+        idxs = gr.get_index_of(lons, lats)
+        for i, idx in enumerate(idxs):
+            found_poly = gr.polygons[idx]
+            lon = lons[i]
+            lat = lats[i]
+
+            assert lon >= found_poly.points[1][0] and lon < found_poly.points[2][0]
+            assert lat >= found_poly.points[0][1] and lat < found_poly.points[2][1]
+
 if __name__ == '__main__':
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`#!/usr/bin/env bash`
`2`		`-pytest`
	`2`	`+pytest -v`