Merge pull request #132 from FESOM/suvi_accessor_selections

Suvi accessor selections
FESOM · Mar 23, 2021 · a2951ce · a2951ce
2 parents 076ae42 + 400887b
commit a2951ce
Show file tree

Hide file tree

Showing 9 changed files with 14,252 additions and 1 deletion.
diff --git a/notebooks/accessor_selections.ipynb b/notebooks/accessor_selections.ipynb
diff --git a/notebooks/images/advanced_selection.png b/notebooks/images/advanced_selection.png
diff --git a/notebooks/images/point_selection.png b/notebooks/images/point_selection.png
diff --git a/notebooks/images/region_selection.png b/notebooks/images/region_selection.png
diff --git a/notebooks/images/trajectory_selection.png b/notebooks/images/trajectory_selection.png
diff --git a/pyfesom2/__init__.py b/pyfesom2/__init__.py
@@ -18,3 +18,5 @@
 from .diagnostics import *
 from .ut import *
 from .fesom2GeoFormat import *
+from .datasets import open_dataset
+from .accessor import FESOMDataArray as _FESOMDataArray
diff --git a/pyfesom2/accessor.py b/pyfesom2/accessor.py
diff --git a/pyfesom2/datasets.py b/pyfesom2/datasets.py
@@ -37,7 +37,7 @@
     "CORE"   : {
         "path_url"   : "https://swift.dkrz.de/v1/dkrz_02942825-0cab-44f3-ad37-80fd5d2e37e3/FESOM2_data/LCORE",
         "Dataset URL": "https://swiftbrowser.dkrz.de/public/dkrz_02942825-0cab-44f3-ad37-80fd5d2e37e3/FESOM2_data/LCORE",
-        "var_list"   : ["temp", "salt", "a_ice", "m_ice", "ssh", "sst"]},
+        "var_list"   : ["temp", "salt", "a_ice", "m_ice", "ssh", "sst", "mesh"]},
     "pi-grid": {
         'path_url'   : "https://swift.dkrz.de/v1/dkrz_035d8f6ff058403bb42f8302e6badfbc/pyfesom2/tutorial/pi-grid",
         "var_list"   : ['a_ice', 'm_ice', 'temp', 'u', 'v', 'w', 'mesh'],

diff --git a/tests/test_accessor.py b/tests/test_accessor.py
@@ -0,0 +1,372 @@
+import numpy as np
+import pytest
+from shapely.geometry import box, Polygon
+
+from pyfesom2.datasets import open_dataset
+
+
+# TODO: push this to test config conftest.py so that it is available globally
+@pytest.fixture(scope='module')
+def local_dataset(request):
+    import os.path
+    cur_dir = os.path.dirname(request.fspath)
+    data_path = os.path.join(cur_dir, "data", "pi-results", "temp.fesom.*.nc")
+    mesh_path = os.path.join(cur_dir, "data", "pi-grid")
+    da = open_dataset(data_path, mesh_path=mesh_path)
+    yield da
+
+
+# use scope=function if we want to change grid size dynamically for each test case,
+# but merged dataset then needs to be changed appropriately to take the param.
+@pytest.fixture(scope='module', params=[36 * 18, 360 * 180])  # same pts as 10deg, 1deg reg grid
+def random_spatial_dataset(request):
+    from matplotlib.tri import Triangulation
+    import xarray as xr
+
+    if request.param is not None:
+        spatial_size = request.param
+    else:
+        spatial_size = 36 * 18  # default, it covers more edge cases
+
+    lons = np.random.uniform(-180., 180., spatial_size)
+    lats = np.random.uniform(-90., 90., spatial_size)
+    tris = Triangulation(lons, lats)
+    dataset = xr.Dataset(coords={'lon'  : ('nod2', lons),
+                                 'lat'  : ('nod2', lats),
+                                 'faces': (('nelem', 'three'), tris.triangles)})
+    dataset['dummy_2d_var'] = ('nod2', np.random.uniform(0., 1., spatial_size))
+
+    yield dataset
+
+
+@pytest.fixture
+def random_nd_dataset(random_spatial_dataset):
+    """time(12), level(40), parametrized nod2 dataset"""
+    import pandas as pd
+    times = pd.date_range('2019-01-01', freq='m', periods=12)
+    nz1 = np.linspace(0, -5000, 40)
+    dataset = random_spatial_dataset.assign_coords({'time': times, 'nz1': nz1})
+    dummy_nd_var = np.random.uniform(0., 1., (len(times), len(random_spatial_dataset.nod2), len(nz1)))
+    dataset['dummy_nd_var'] = (('time', 'nod2', 'nz1'), dummy_nd_var)
+    yield dataset
+
+
+# merged dataset fixture using just 2 for now, in future add remote too
+@pytest.fixture(params=["local_dataset", "random_spatial_dataset"])
+def dataset(local_dataset, random_spatial_dataset, request):
+    yield request.getfixturevalue(request.param)
+
+
+@pytest.fixture(scope='module')
+def five_point_dataset():
+    """
+    A five point dataset defined at four corners of -180,-90 -> 180,-90 -> 180, 90 -> 0,0 -> -180,90,
+    four corners of global map and center. This gives a very controlled fesom-like-triangular mesh with predictable
+    triangulation (in ccw): [[0,1,3],[1,2,3], [2,4,3], [4,0,3]]. This makes testing of point selection, region
+    selections on both nodes and faces easy. This can also be used to test interpolations.
+    in practice use nodes slightly inside domain adjusted by 1 deg, to accommodate shapely's contains.
+    """
+    import xarray as xr
+    lons = [-180., 180., 180., 0., -180.]
+    lats = [-90., -90., 90., 0., 90.]
+    faces = np.array([[0, 1, 3],
+                      [1, 2, 3],
+                      [2, 4, 3],
+                      [4, 0, 3]])  # no of faces = 2*nodes - 2*boundary nodes -2
+    dataset = xr.Dataset(coords={'lon'  : ('nod2', lons),
+                                 'lat'  : ('nod2', lats),
+                                 'faces': (('nelem', 'three'), faces)}
+                         )
+    dataset['dummy_2d_var'] = ('nod2', np.random.uniform(0., 1., len(lons)))
+    yield dataset
+
+
+# TODO: skipping because remote datasets need to be fixed for plotting and optimized for remote transfer:
+#     current unsorted points leads to too much data transfer, atleast when remote dataset contains faces,
+#     integrate this in dataset fixture as a param and mark it slow
+# @pytest.fixture(scope='module', params=[LCORE, A01])
+# def lcore_dataset(request):
+#     da = request.param.load()
+#     yield da
+
+# Test selection utils
+
+def test_normalize_distances():
+    from pyfesom2.accessor import normalize_distance
+
+    dists_lt_1km = np.linspace(0, 999, 10)
+    dists_gt_1km = np.linspace(1000, 10000, 2)
+    units, dist = normalize_distance(np.hstack([dists_lt_1km, dists_gt_1km]))
+    assert units == 'm'
+    assert ~np.any(np.isnan(dist)), 'NaNs in dists should not be possible.'
+
+    dists_lt_1km = np.linspace(0, 999, 2)
+    dists_gt_1km = np.linspace(1000, 10000, 10)
+    units, dist = normalize_distance(np.hstack([dists_lt_1km, dists_gt_1km]))
+    assert units == 'km'
+    assert ~np.any(np.isnan(dist)), 'NaNs in dists should not be possible.'
+
+
+# Test selection methods
+
+bbox_tests = [
+    (-10, 50, 60, 80),  # LL -> UR (minx, miny, maxx, maxy)
+    # use bad values and check
+    # bounds smaller then grid size
+    # invalid bounds.
+]
+
+
+@pytest.mark.parametrize("bbox", bbox_tests)
+def test_select_bbox(dataset, bbox):
+    from pyfesom2.accessor import select_bbox
+
+    # selection on datasets
+    sda = select_bbox(dataset, bbox)
+    slon_min, slat_min, slon_max, slat_max = (sda.lon.min(), sda.lat.min(),
+                                              sda.lon.max(), sda.lon.max())
+    # check within bbox's bounds
+    # because pyfesom2.ut's cut_region uses closed bounds and sel is only when all nodes
+    # of triangle are in bounds.
+
+    assert slon_min >= bbox[0] and slon_max <= bbox[2]
+    assert slat_min >= bbox[1] and slat_max <= bbox[3]
+
+    # selection on data arrays
+    data_var = list(dataset.data_vars.keys())[0]
+    with pytest.raises(ValueError):
+        select_bbox(dataset[data_var], bbox)
+
+    sda = select_bbox(dataset[data_var], bbox, faces=dataset.faces)
+    slon_min, slat_min, slon_max, slat_max = (sda.lon.min(), sda.lat.min(),
+                                              sda.lon.max(), sda.lon.max())
+    assert slon_min >= bbox[0] and slon_max <= bbox[2]
+    assert slat_min >= bbox[1] and slat_max <= bbox[3]
+
+
+region_tests = [
+    *bbox_tests,  # bbox tests should be valid inputs for select_region
+    *[box(*bbox) for bbox in bbox_tests],  # bbox tests cst s shapely's box
+    Polygon([(-70, 30), (-10, 0), (-10, 60)]),  # a triangle in atlantic
+    # in future add complex region
+    # polygons from pyfesom's ut
+]
+
+
+@pytest.mark.parametrize("region", region_tests)
+def test_select_region(dataset, region):
+    from typing import Sequence
+    import warnings
+    from shapely.geometry import MultiPoint
+    from pyfesom2.accessor import select_region
+    import xarray as xr
+    #     # convex hull of returned dataset
+    if isinstance(region, Sequence):
+        outer_polygon = box(*region)
+    else:
+        outer_polygon = region
+
+    sda = select_region(dataset, region)
+
+    if len(sda.lon) == 0:
+        with pytest.warns(Warning):
+            warnings.warn("No points in domain are within region, returning original data.", Warning)
+    else:
+        mp = MultiPoint(np.vstack((sda.lon, sda.lat)).T)
+        assert outer_polygon.contains(mp.convex_hull)
+
+    # check passing dataarray
+    test_data_var_name = list(dataset.data_vars.keys())[0]
+    test_data_array = dataset[test_data_var_name]
+    with pytest.raises(ValueError):
+        # region selection is not done when faces are not present
+        # dataarrays cannot contain faces so ValueError
+        sda = select_region(test_data_array, region)
+
+    sda = select_region(test_data_array, region, faces=dataset.faces)
+    # returned region selection on data array is a dataset
+    assert isinstance(sda, xr.Dataset)
+
+
+@pytest.mark.parametrize("npoints", [10])
+def test_select_points(dataset, npoints, request):
+    from pyfesom2.accessor import select_points
+    # dataset = local_dataset # makes it easy to change fixture
+    # select random points from datapoints
+    random_pts = np.random.choice(len(dataset.nod2), npoints)  # replace=True by default
+    lats = dataset.lat[random_pts]
+    lons = dataset.lon[random_pts]
+
+    # pass lons and lats as numpy array
+    sda = select_points(dataset, lons.values, lats.values)
+    assert np.array_equal(sda.lon.values, lons.values) & np.array_equal(sda.lat.values, lats.values)
+
+    # test passing lon and lat as scalars
+    slon, slat = np.array(lons.values[0], ndmin=1), np.array(lats.values[0], ndmin=1)  # to match shapes
+    sda = select_points(dataset, float(slon), float(slat))
+    assert np.array_equal(sda.lon.values, slon) & np.array_equal(sda.lat.values, slat)
+
+    # selection on data arrays
+    data_var = list(dataset.data_vars.keys())[0]
+    sda = select_points(dataset[data_var], lons, lats)
+    assert np.array_equal(sda.lon.values, lons) & np.array_equal(sda.lat.values, lats)
+
+    # pass lons and lats as xarray data array
+    sda = select_points(dataset, lons, lats)
+    assert np.array_equal(sda.lon.values, lons) & np.array_equal(sda.lat.values, lats)
+
+    # check other attrs
+    assert 'nod2' in sda.dims, 'point selection will always have points in dimensions'
+    assert len(sda.nod2) == npoints, 'length of selected points should be same as lats, lons'
+    assert 'faces' not in sda, 'faces are not returned by select points'
+
+    assert 'distance' in sda.coords, 'by default select points returns distance as coordinate'
+    assert np.isclose(sda.distance[0], 0.0), ' distance to first point is always 0.'
+
+    sda = select_points(dataset, lons, lats, return_distance=False)
+    assert 'distance' not in sda, ' point selection with return_distance=False does not have distance'
+
+    # test against points not necessarily in dataset
+    npoints = 20
+    lons = np.linspace(-180, 180, npoints)
+    lats = np.linspace(-90, 90, npoints)
+    sda = select_points(dataset, lons, lats)
+    assert len(sda.nod2) == npoints
+
+
+@pytest.mark.parametrize("npoints", [10])
+def test_select_points_advanced(random_nd_dataset, npoints):
+    """Test trajectory like selection on time, level dimensions"""
+    import pandas as pd
+    from pyfesom2.accessor import select_points
+
+    dataset = random_nd_dataset
+    random_pts = np.random.choice(len(dataset.nod2), npoints)  # replace=True by default
+    lats = dataset.lat[random_pts].values
+    lons = dataset.lon[random_pts].values
+    nz1 = np.random.choice(dataset.nz1.values, npoints)
+    # not linearly increasing times are awkward for trajectory
+    linear_times = pd.date_range(dataset.time.min().values, dataset.time.max().values, periods=npoints)
+
+    # checking if dimension name of selection can be changed
+    sda = select_points(dataset, lon=lons, lat=lats, time=linear_times, nz1=nz1, selection_dim_name='points')
+    assert "points" in sda.dims
+
+    assert len(sda.points) == npoints
+    assert len(sda.lon) == len(sda.lat) == len(sda.nz1) == len(sda.time)
+    assert all([coord in sda.coords for coord in ['lon', 'lat', 'time', 'nz1']])
+    assert not all([dim in sda.dims for dim in ('time', 'nz1')])
+
+
+def test_selection_of_faces(five_point_dataset):
+    from shapely.geometry import box
+    from pyfesom2.accessor import select_region
+    dataset = five_point_dataset
+    region = [0., -90., 180., 90.]  # right half
+    sh_region = box(*region).buffer(1e-6)
+    sel_da = select_region(dataset, region=sh_region)
+
+    assert len(sel_da.nod2) == 3  # 3 points
+    assert sel_da.faces.shape == (1, 3)  # 1 triangle
+    assert np.all(np.isin([0., 180.], sel_da.lon))
+    assert np.all(np.isin([-90., 90., 0.], sel_da.lat))
+
+
+def test_select(random_nd_dataset):
+    dataset = random_nd_dataset
+    npoints = 20
+    lons = np.linspace(-180, 180, npoints)
+    lats = np.linspace(-90, 90, npoints)
+    # test passing slices of other dims
+    sda = dataset.pyfesom2.select(lon=lons, lat=lats, time=slice('2019-05-01', '2019-12-31'), nz1=slice(0, -1000))
+    assert "time" in sda.dims
+    assert "nz1" in sda.dims
+
+    # check exceptions
+    region = region_tests[0]
+    with pytest.raises(ValueError):
+        sda = dataset.pyfesom2.select(lon=lons, lat=lats, region=region)
+
+    with pytest.raises(NotImplementedError):
+        sda = dataset.pyfesom2.select(lon=lons, lat=lats, method='linear')
+
+    with pytest.raises(ValueError):
+        sda = dataset.pyfesom2.select(lon=lons)
+
+    # path cannot have more then 2 tuples
+    with pytest.raises(ValueError):
+        sda = dataset.pyfesom2.select(path=(lons, lats, dataset.nz1.values))
+    with pytest.raises(ValueError):
+        sda = dataset.pyfesom2.select(path=lons)
+
+
+# Test methods on accessors
+
+def test_dataset_accessor_attrs(dataset):
+    assert hasattr(dataset, "pyfesom2")
+
+    dataset_methods = ["select", "select_points", "__repr__", "_repr_html_"]  # , "plot", "triplot"]
+    for method in dataset_methods:
+        assert hasattr(dataset.pyfesom2, method)
+    # check if reprs are same as xarray dataset
+    assert dataset.__repr__() == dataset.pyfesom2.__repr__()
+    assert dataset.pyfesom2._repr_html_() is not None
+
+
+def test_dataarray_accessor_attrs(dataset):
+    """Test dataarray methods on a data variable"""
+    test_dataarray = list(dataset.data_vars.keys())[0]
+    dataarray_methods = ["select", "select_points", "__repr__", "_repr_html_"]  # , "plot", "triplot"]
+
+    for method in dataarray_methods:
+        assert hasattr(dataset.pyfesom2, method)
+
+    # check if reprs exist
+    assert getattr(dataset.pyfesom2, test_dataarray).__repr__().startswith("Wrapped")
+    assert getattr(dataset.pyfesom2, test_dataarray)._repr_html_() is not None
+
+
+def test_dataset_accessor_methods(random_nd_dataset):
+    import xarray as xr
+    dataset = random_nd_dataset
+    region = region_tests[0]
+    sda = dataset.pyfesom2.select(region=region)
+    assert isinstance(sda, xr.Dataset)
+    # test accessor to select points
+    npoints = 10
+    lons = np.linspace(-180, 180, npoints)
+    lats = np.linspace(-90, 90, npoints)
+    sda = dataset.pyfesom2.select(lon=lons, lat=lats)
+    assert isinstance(sda, xr.Dataset)
+
+    sda = dataset.pyfesom2.select_points(lon=lons, lat=lats)
+    assert isinstance(sda, xr.Dataset)
+
+
+def test_dataarray_accessor_methods(dataset):
+    from shapely.geometry import LineString
+    for data_var in dataset.data_vars.keys():
+        assert hasattr(dataset.pyfesom2, data_var)
+
+        data_var = list(dataset.data_vars.keys())[0]
+
+        # TODO: do this better, not extensive
+        # test select region
+        for region in region_tests:
+            assert getattr(dataset.pyfesom2, data_var).select(region=region) is not None
+
+        # test select points
+        npoints = 10
+        lons = np.linspace(-180, 180, npoints)
+        lats = np.linspace(-90, 90, npoints)
+
+        # TODO: do this better, assert the returned objects correspond to that of selection methods
+        assert getattr(dataset.pyfesom2, data_var).select(
+            path=(lons, lats)) is not None, 'select cannot take lon,lat as sequence'
+        shapely_path = LineString(np.column_stack([lons, lats]))
+        assert getattr(dataset.pyfesom2, data_var).select(
+            path=shapely_path) is not None, 'select cannot take lon,lat as LineString'
+        dict_path = {'lon': lons, 'lat': lats}
+        assert getattr(dataset.pyfesom2, data_var).select(
+            path=dict_path) is not None, 'select cannot take lon,lat as dictionary'
+        assert getattr(dataset.pyfesom2, data_var).select_points(lon=lons, lat=lats) is not None