From 497caf42bffcb14f19a46718eee2c9ceed54e697 Mon Sep 17 00:00:00 2001 From: Helge Date: Wed, 16 Jan 2019 13:37:46 +0100 Subject: [PATCH 01/12] ISSUE: The issue was lying in loading data sources with meshed (2D) lat/lon coordinates. Cate attempts to convert these into 1D lat/lons, but requires coordinates with a uniform diff along the a lat/lon coordinate axis (because it is a grid). The Seaice data does not conform to this requirement which caused cate to throw a cryptic Exception. FIX: Cate refuses now loading such a data source throwing a ValidationError --- cate/core/opimpl.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/cate/core/opimpl.py b/cate/core/opimpl.py index cc9a3c753..b87862910 100644 --- a/cate/core/opimpl.py +++ b/cate/core/opimpl.py @@ -145,11 +145,15 @@ def _normalize_lat_lon_2d(ds: xr.Dataset) -> xr.Dataset: equal_lat = np.allclose(lat_data_1, lat_data_2, equal_nan=True) equal_lon = np.allclose(lon_data_1, lon_data_2, equal_nan=True) - if not (equal_lat and equal_lon): - return ds + + # Drop lat lon in any case. If note qual_lat and equal_lon subset_spatial_impl will subsequently + # fail with a ValidationError ds = ds.drop(['lon', 'lat']) + if not (equal_lat and equal_lon): + return ds + ds = ds.rename({ x_dim_name: 'lon', y_dim_name: 'lat', @@ -805,6 +809,16 @@ def subset_spatial_impl(ds: xr.Dataset, :param monitor: optional progress monitor :return: Subset dataset """ + + # Validate whether lat and lon exists. + + if not hasattr(ds, 'lon') or not hasattr(ds, 'lat'): + raise ValidationError('Cannot apply regional subset. No (valid) geocoding found.') + + if hasattr(ds, 'lon') and len(ds.lon.shape) != 1 \ + or hasattr(ds, 'lat') and len(ds.lat.shape) != 1: + raise ValidationError('Geocoding not recognised. Lat and/or lon variables have more than one dimension.') + monitor.start('Subset', 10) # Validate input try: From 89d7951504ddde227a07bf700a709e2ca8738a8b Mon Sep 17 00:00:00 2001 From: Helge Date: Wed, 16 Jan 2019 13:39:20 +0100 Subject: [PATCH 02/12] Extended subset tests to test code that resolved the below issue. ISSUE: The issue was lying in loading data sources with meshed (2D) lat/lon coordinates. Cate attempts to convert these into 1D lat/lons, but requires coordinates with a uniform diff along the a lat/lon coordinate axis (because it is a grid). The Seaice data does not conform to this requirement which caused cate to throw a cryptic Exception. FIX: Cate refuses now loading such a data source throwing a ValidationError --- test/ops/test_subset.py | 43 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/test/ops/test_subset.py b/test/ops/test_subset.py index f5a04f8e6..dbbd42607 100644 --- a/test/ops/test_subset.py +++ b/test/ops/test_subset.py @@ -6,8 +6,11 @@ import numpy as np import xarray as xr +import pandas as pd from cate.core.op import OP_REGISTRY +from cate.core.opimpl import subset_spatial_impl +from cate.core.types import ValidationError from cate.ops import subset from cate.util.misc import object_to_qualified_name @@ -19,7 +22,44 @@ def assert_dataset_equal(expected, actual): assert expected.equals(actual), (expected, actual) +def get_test_subset_non_valid_lat_lon_dataset(): + temp = np.random.randn(2, 2, 3) + precip = np.random.rand(2, 2, 3) + lon = [[-40, 40], [-40, 40]] + lat = [[-50, 50], [-50, 50]] + return xr.Dataset({'temp': (['x', 'y', 'time'], temp), + 'precip': (['x', 'y', 'time'], precip)}, + coords={'lon': (['x', 'y'], lon), + 'lat': (['x', 'y'], lat), + 'time': pd.date_range('2014-09-06', periods=3)}) + + class TestSubsetSpatial(TestCase): + def test_subset_non_valid_lat_lon(self): + """ + Test whether lat and/or lon exist and if they exist whether they have dimension = 1 + :return: void + """ + + # Test whether lat lon exist + dataset = xr.Dataset({ + 'first': (['xc', 'yc', 'time'], np.ones([180, 360, 6])), + 'second': (['xc', 'yc', 'time'], np.ones([180, 360, 6])), + 'xc': np.linspace(-89.5, 89.5, 180), + 'yc': np.linspace(-179.5, 179.5, 360), + }) + + with self.assertRaises(ValidationError) as error: + subset_spatial_impl(dataset, (-40, 40, -50, 50)) + self.assertIn('No geocoding found', str(error.exception)) + + # test whether lat lon has the wrong dimension (!=1) + dataset = get_test_subset_non_valid_lat_lon_dataset() + + with self.assertRaises(ValidationError) as error: + subset_spatial_impl(dataset, (-40, 40, -50, 50)) + self.assertIn('Geocoding not recognised', str(error.exception)) + def test_nominal(self): """ Test general 'most expected' use case functionality. @@ -413,7 +453,6 @@ def test_non_geospatial_variable(self): actual = subset.subset_spatial(dataset, poly, mask=True) xr.testing.assert_equal(expected.third, actual.third) - class TestSubsetTemporal(TestCase): def test_subset_temporal(self): # Test general functionality @@ -469,7 +508,6 @@ def test_registered(self): 'time': [datetime(2000, x, 1) for x in range(2, 5)]}) assert_dataset_equal(expected, actual) - class TestSubsetTemporalIndex(TestCase): def test_subset_temporal_index(self): # Test general functionality @@ -518,7 +556,6 @@ def test_registered(self): 'time': ['2000-03-01', '2000-04-01', '2000-05-01']}) assert_dataset_equal(expected, actual) - class TestExtractPoint(TestCase): @classmethod def setUpClass(cls): From e4ce9d804051f7f296bc665078a2871e266f6720 Mon Sep 17 00:00:00 2001 From: Helge Date: Wed, 16 Jan 2019 13:44:05 +0100 Subject: [PATCH 03/12] Change entry to below issue fix. ISSUE: The issue was lying in loading data sources with meshed (2D) lat/lon coordinates. Cate attempts to convert these into 1D lat/lons, but requires coordinates with a uniform diff along the a lat/lon coordinate axis (because it is a grid). The Seaice data does not conform to this requirement which caused cate to throw a cryptic Exception. FIX: Cate refuses now loading such a data source throwing a ValidationError --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index fe5b9a24c..bc59e4cb7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -251,6 +251,8 @@ Skipped. ### Fixes +* Loading SeaIce data throws a ValueError: The truth value of an array with more than one element is ambiguous. + [#832](https://github.com/CCI-Tools/cate/issues/832) * Be tolerant of "invalid" geometries passed to operations expecting polygon WKT values [#506](https://github.com/CCI-Tools/cate/issues/506) From 895c993775cb483788a1bc9138a2f9547641ac08 Mon Sep 17 00:00:00 2001 From: Helge Date: Wed, 16 Jan 2019 13:47:22 +0100 Subject: [PATCH 04/12] Shifted change entry to last version --- CHANGES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index bc59e4cb7..fc456f13d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,7 @@ ## Version 2.0.0.dev24 (in development) +* Loading SeaIce data throws a ValueError: The truth value of an array with more than one element is ambiguous. + [#832](https://github.com/CCI-Tools/cate/issues/832) * A set of related data access issues that all occurred if data was downloaded from ODP using a spatial subset has hopefully been addressed in one go. Issues include: - Cannot download SST dataset - I/O error. [#823](https://github.com/CCI-Tools/cate/issues/823) @@ -251,8 +253,6 @@ Skipped. ### Fixes -* Loading SeaIce data throws a ValueError: The truth value of an array with more than one element is ambiguous. - [#832](https://github.com/CCI-Tools/cate/issues/832) * Be tolerant of "invalid" geometries passed to operations expecting polygon WKT values [#506](https://github.com/CCI-Tools/cate/issues/506) From 47d4eee749e567ae02291ece3d77e6542ba8662d Mon Sep 17 00:00:00 2001 From: Helge Date: Fri, 18 Jan 2019 09:47:57 +0100 Subject: [PATCH 05/12] - Test failed because I added '(valid)' to error message in subset_spatial_impl. Added the missing expression in the test - reformatted test_subset.py to comply with PEP8 --- test/ops/test_subset.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/ops/test_subset.py b/test/ops/test_subset.py index dbbd42607..f6e5e5005 100644 --- a/test/ops/test_subset.py +++ b/test/ops/test_subset.py @@ -28,10 +28,10 @@ def get_test_subset_non_valid_lat_lon_dataset(): lon = [[-40, 40], [-40, 40]] lat = [[-50, 50], [-50, 50]] return xr.Dataset({'temp': (['x', 'y', 'time'], temp), - 'precip': (['x', 'y', 'time'], precip)}, - coords={'lon': (['x', 'y'], lon), - 'lat': (['x', 'y'], lat), - 'time': pd.date_range('2014-09-06', periods=3)}) + 'precip': (['x', 'y', 'time'], precip)}, + coords={'lon': (['x', 'y'], lon), + 'lat': (['x', 'y'], lat), + 'time': pd.date_range('2014-09-06', periods=3)}) class TestSubsetSpatial(TestCase): @@ -51,7 +51,7 @@ def test_subset_non_valid_lat_lon(self): with self.assertRaises(ValidationError) as error: subset_spatial_impl(dataset, (-40, 40, -50, 50)) - self.assertIn('No geocoding found', str(error.exception)) + self.assertIn('No (valid) geocoding found', str(error.exception)) # test whether lat lon has the wrong dimension (!=1) dataset = get_test_subset_non_valid_lat_lon_dataset() @@ -453,6 +453,7 @@ def test_non_geospatial_variable(self): actual = subset.subset_spatial(dataset, poly, mask=True) xr.testing.assert_equal(expected.third, actual.third) + class TestSubsetTemporal(TestCase): def test_subset_temporal(self): # Test general functionality @@ -508,6 +509,7 @@ def test_registered(self): 'time': [datetime(2000, x, 1) for x in range(2, 5)]}) assert_dataset_equal(expected, actual) + class TestSubsetTemporalIndex(TestCase): def test_subset_temporal_index(self): # Test general functionality @@ -556,6 +558,7 @@ def test_registered(self): 'time': ['2000-03-01', '2000-04-01', '2000-05-01']}) assert_dataset_equal(expected, actual) + class TestExtractPoint(TestCase): @classmethod def setUpClass(cls): From 4284d0586d7c3767768939595553ed61e2695183 Mon Sep 17 00:00:00 2001 From: Helge Date: Fri, 18 Jan 2019 10:01:16 +0100 Subject: [PATCH 06/12] Reformatted various files to comply with PEP8 --- cate/core/ds.py | 2 +- cate/core/op.py | 4 ++-- cate/core/workflow.py | 4 ++-- cate/core/wsmanag.py | 2 +- cate/ops/animate.py | 2 +- cate/util/cache.py | 2 +- cate/util/misc.py | 2 +- cate/util/process.py | 4 ++-- cate/util/web/webapi.py | 2 +- cate/webapi/rest.py | 6 +++--- 10 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cate/core/ds.py b/cate/core/ds.py index 475eb47f4..e8e97e571 100644 --- a/cate/core/ds.py +++ b/cate/core/ds.py @@ -618,7 +618,7 @@ def open_xarray_dataset(paths, e.g. the whole array in the file. Otherwise smaller dask chunks will be used to split the dataset. - :param paths: Either a string glob in the form "path/to/my/files/\*.nc" or an explicit + :param paths: Either a string glob in the form "path/to/my/files/\\*.nc" or an explicit list of files to open. :param region: Optional region constraint. :param var_names: Optional variable names constraint. diff --git a/cate/core/op.py b/cate/core/op.py index 9ffe2d867..afb292b10 100644 --- a/cate/core/op.py +++ b/cate/core/op.py @@ -698,12 +698,12 @@ def new_subprocess_op(op_meta_info: OpMetaInfo, and returns a tuple (label, total_work) or a regex that must match in order to signal the start of progress monitoring. The regex must provide the group names "label" or "total_work" or both, - e.g. "(?P