From 3b04fb0158de93fe600cfe0ac8f35de9cb75829e Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Tue, 25 Oct 2016 21:18:09 -0700 Subject: [PATCH 1/9] Added join key to OPTIONS, used in dataarray & dataset binary ops, with a test module in test_dataarray.py --- xarray/core/dataarray.py | 7 +++++-- xarray/core/dataset.py | 7 +++++-- xarray/core/options.py | 3 ++- xarray/test/test_dataarray.py | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 71164072cc9..108776a6361 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -23,6 +23,7 @@ assert_unique_multiindex_level_names) from .formatting import format_item from .utils import decode_numpy_dict_values, ensure_us_time_resolution +from .options import OPTIONS def _infer_coords_and_dims(shape, coords, dims): @@ -1377,13 +1378,15 @@ def func(self, *args, **kwargs): return func @staticmethod - def _binary_op(f, reflexive=False, join='inner', **ignored_kwargs): + def _binary_op(f, reflexive=False, join=None, **ignored_kwargs): @functools.wraps(f) def func(self, other): if isinstance(other, (Dataset, groupby.GroupBy)): return NotImplemented if hasattr(other, 'indexes'): - self, other = align(self, other, join=join, copy=False) + # if user does not specify join, default to OPTIONS['join'] + how_to_join = join if join is not None else OPTIONS['join'] + self, other = align(self, other, join=how_to_join, copy=False) other_variable = getattr(other, 'variable', other) other_coords = getattr(other, 'coords', None) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 90d67e8e3cf..27ba7b28e7e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -25,6 +25,7 @@ from .pycompat import (iteritems, basestring, OrderedDict, dask_array_type) from .combine import concat +from .options import OPTIONS # list of attributes of pd.DatetimeIndex that are ndarrays of time info @@ -2018,13 +2019,15 @@ def func(self, *args, **kwargs): return func @staticmethod - def _binary_op(f, reflexive=False, join='inner', fillna=False): + def _binary_op(f, reflexive=False, join=None, fillna=False): @functools.wraps(f) def func(self, other): if isinstance(other, groupby.GroupBy): return NotImplemented if hasattr(other, 'indexes'): - self, other = align(self, other, join=join, copy=False) + # if user does not specify join, default to OPTIONS['join'] + how_to_join = join if join is not None else OPTIONS['join'] + self, other = align(self, other, join=how_to_join, copy=False) g = f if not reflexive else lambda x, y: f(y, x) ds = self._calculate_binary_op(g, other, fillna=fillna) return ds diff --git a/xarray/core/options.py b/xarray/core/options.py index 0594a1ce36d..bb55c6e0ee5 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -1,4 +1,5 @@ -OPTIONS = {'display_width': 80} +OPTIONS = {'display_width': 80, + 'join': "inner"} class set_options(object): diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 0c2dfd3c4c7..c4d775f6c3d 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -2268,3 +2268,38 @@ def test_dot(self): da.dot(dm.values) with self.assertRaisesRegexp(ValueError, 'no shared dimensions'): da.dot(DataArray(1)) + + def test_binary_op_join_setting(self): + """ + A test method to verify the ability to set binary operation join kwarg + ("inner", "outer", "left", "right") via xr.set_options(). + """ + # First we set up a data array + xdim, ydim, zdim = 'x', 'y', 'z' + xcoords, ycoords, zcoords = ['a', 'b', 'c'], [-2, 0, 2], [0, 1, 2] + total_size = len(xcoords) * len(ycoords) * len(zcoords) + # create a 3-by-3-by-3 data array + arr = xr.DataArray(np.arange(total_size).\ + reshape(len(xcoords),len(ycoords),len(zcoords)), + [(xdim, xcoords), (ydim, ycoords),(zdim, zcoords)]) + # now create a data array with the last x slice missing + arr1 = arr[0:-1,:,:].copy() + # create another data array with the last z slice missing + arr2 = arr[:,:,0:-1].copy() + # because the default in OPTIONS is join="inner", we test "outer" first + xr.set_options(join="outer") + result = arr1 + arr2 + self.assertTrue(result.size == total_size) # should be 3 * 3 * 3 + self.assertTrue(result.shape == arr.shape) + self.assertTrue(result[-1,:,:].isnull().all()) + self.assertTrue(result[:,:,-1].isnull().all()) + # now revert back to join="inner" + xr.set_options(join="inner") + result = arr1 + arr2 + self.assertTrue(result.size == \ + (len(xcoords)-1)*len(ycoords)*(len(zcoords)-1)) + self.assertTrue(result.shape == \ + (len(xcoords)-1, len(ycoords), len(zcoords)-1)) + self.assertTrue(result.notnull().all()) + self.assertTrue('c' not in list(result['x'])) + self.assertTrue(2 not in list(result['z'])) From 643fa49d3f49f3cfd3ddd3e1e0ee164ae90a96e2 Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Wed, 26 Oct 2016 13:51:32 -0700 Subject: [PATCH 2/9] Added binary_ops test to test_dataset. --- xarray/test/test_dataset.py | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 41c5953e063..3fff013ea6f 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -12,6 +12,7 @@ import numpy as np import pandas as pd +import xarray as xr import pytest from xarray import (align, broadcast, concat, merge, conventions, backends, @@ -2916,6 +2917,47 @@ def test_filter_by_attrs(self): for var in new_ds.data_vars: self.assertEqual(new_ds[var].height, '10 m') + def test_binary_op_join_setting(self): + """ + A test method to verify the ability to set binary operation join kwarg + ("inner", "outer", "left", "right") via xr.set_options(). + """ + # First we set up a data array + xdim, ydim, zdim = 'x', 'y', 'z' + xcoords, ycoords, zcoords = ['a', 'b', 'c'], [-2, 0, 2], [0, 1, 2] + total_size = len(xcoords) * len(ycoords) * len(zcoords) + # create a 3-by-3-by-3 data array + arr = DataArray(np.arange(total_size).\ + reshape(len(xcoords),len(ycoords),len(zcoords)), + [(xdim, xcoords), (ydim, ycoords),(zdim, zcoords)]) + # now create a data array with the last x slice missing + arr1 = arr[0:-1,:,:].copy() + ds1 = arr1.to_dataset(name='foo') + # create another data array with the last z slice missing + arr2 = arr[:,:,0:-1].copy() + ds2 = arr2.to_dataset(name='foo') # needs to be name='foo' as well + # because the default in OPTIONS is join="inner", we test "outer" first + xr.set_options(join="outer") + result = ds1 + ds2 + self.assertTrue(result.foo.size == total_size) # should be 3 * 3 * 3 + self.assertTrue(result.foo.shape == arr.shape) + self.assertTrue(result.foo[-1,:,:].isnull().all()) + self.assertTrue(result.foo[:,:,-1].isnull().all()) + # now revert back to join="inner" + xr.set_options(join="inner") + result = ds1 + ds2 + self.assertTrue(result.foo.size == \ + (len(xcoords)-1)*len(ycoords)*(len(zcoords)-1)) + self.assertTrue(result.foo.shape == \ + (len(xcoords)-1, len(ycoords), len(zcoords)-1)) + self.assertTrue(result.foo.notnull().all()) + self.assertTrue('c' not in list(result.foo['x'])) + self.assertTrue(2 not in list(result.foo['z'])) + # just for kicks, what happens when the dataarrays have different names? + ds3 = arr1.to_dataset(name='bar') + result = ds1 + ds3 + self.assertTrue(len(result.data_vars)==0) # empty dataset + ### Py.test tests From 43df4d0c8318c489627c14ffd85caa7c821a00ec Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Thu, 27 Oct 2016 13:53:14 -0700 Subject: [PATCH 3/9] Changed variable names according to review comments. --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- xarray/test/test_dataarray.py | 15 ++++++--------- xarray/test/test_dataset.py | 19 ++++++++----------- 4 files changed, 16 insertions(+), 22 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 108776a6361..c33f7fea381 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1385,7 +1385,7 @@ def func(self, other): return NotImplemented if hasattr(other, 'indexes'): # if user does not specify join, default to OPTIONS['join'] - how_to_join = join if join is not None else OPTIONS['join'] + how_to_join = join or OPTIONS['join'] self, other = align(self, other, join=how_to_join, copy=False) other_variable = getattr(other, 'variable', other) other_coords = getattr(other, 'coords', None) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 27ba7b28e7e..1515daa9c9b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2026,7 +2026,7 @@ def func(self, other): return NotImplemented if hasattr(other, 'indexes'): # if user does not specify join, default to OPTIONS['join'] - how_to_join = join if join is not None else OPTIONS['join'] + how_to_join = join or OPTIONS['join'] self, other = align(self, other, join=how_to_join, copy=False) g = f if not reflexive else lambda x, y: f(y, x) ds = self._calculate_binary_op(g, other, fillna=fillna) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index c4d775f6c3d..c90d4114a19 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -2283,23 +2283,20 @@ def test_binary_op_join_setting(self): reshape(len(xcoords),len(ycoords),len(zcoords)), [(xdim, xcoords), (ydim, ycoords),(zdim, zcoords)]) # now create a data array with the last x slice missing - arr1 = arr[0:-1,:,:].copy() + missing_last_x = arr[0:-1,:,:].copy() # create another data array with the last z slice missing - arr2 = arr[:,:,0:-1].copy() + missing_last_z = arr[:,:,0:-1].copy() # because the default in OPTIONS is join="inner", we test "outer" first xr.set_options(join="outer") - result = arr1 + arr2 - self.assertTrue(result.size == total_size) # should be 3 * 3 * 3 + result = missing_last_x + missing_last_z self.assertTrue(result.shape == arr.shape) self.assertTrue(result[-1,:,:].isnull().all()) self.assertTrue(result[:,:,-1].isnull().all()) # now revert back to join="inner" xr.set_options(join="inner") - result = arr1 + arr2 - self.assertTrue(result.size == \ - (len(xcoords)-1)*len(ycoords)*(len(zcoords)-1)) + result = missing_last_x + missing_last_z self.assertTrue(result.shape == \ (len(xcoords)-1, len(ycoords), len(zcoords)-1)) self.assertTrue(result.notnull().all()) - self.assertTrue('c' not in list(result['x'])) - self.assertTrue(2 not in list(result['z'])) + self.assertFalse('c' in list(result['x'])) + self.assertFalse(2 in list(result['z'])) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 3fff013ea6f..98d9d2335ae 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -2932,30 +2932,27 @@ def test_binary_op_join_setting(self): [(xdim, xcoords), (ydim, ycoords),(zdim, zcoords)]) # now create a data array with the last x slice missing arr1 = arr[0:-1,:,:].copy() - ds1 = arr1.to_dataset(name='foo') + missing_last_x = arr1.to_dataset(name='foo') # create another data array with the last z slice missing arr2 = arr[:,:,0:-1].copy() - ds2 = arr2.to_dataset(name='foo') # needs to be name='foo' as well + missing_last_z = arr2.to_dataset(name='foo') # needs to be name='foo' as well # because the default in OPTIONS is join="inner", we test "outer" first xr.set_options(join="outer") - result = ds1 + ds2 - self.assertTrue(result.foo.size == total_size) # should be 3 * 3 * 3 + result = missing_last_x + missing_last_z self.assertTrue(result.foo.shape == arr.shape) self.assertTrue(result.foo[-1,:,:].isnull().all()) self.assertTrue(result.foo[:,:,-1].isnull().all()) # now revert back to join="inner" xr.set_options(join="inner") - result = ds1 + ds2 - self.assertTrue(result.foo.size == \ - (len(xcoords)-1)*len(ycoords)*(len(zcoords)-1)) + result = missing_last_x + missing_last_z self.assertTrue(result.foo.shape == \ (len(xcoords)-1, len(ycoords), len(zcoords)-1)) self.assertTrue(result.foo.notnull().all()) - self.assertTrue('c' not in list(result.foo['x'])) - self.assertTrue(2 not in list(result.foo['z'])) + self.assertFalse('c' in list(result.foo['x'])) + self.assertFalse(2 in list(result.foo['z'])) # just for kicks, what happens when the dataarrays have different names? - ds3 = arr1.to_dataset(name='bar') - result = ds1 + ds3 + misnomer = arr1.to_dataset(name='bar') + result = missing_last_x + misnomer self.assertTrue(len(result.data_vars)==0) # empty dataset From ebb2ad06de753b118389416a51ed80c9306045b9 Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Fri, 28 Oct 2016 14:32:55 -0700 Subject: [PATCH 4/9] Changed default key to arithmetic_join, and shortened tests. Also added to computation.rst and whats-new.rst --- doc/computation.rst | 21 ++++++++++++++-- doc/whats-new.rst | 5 ++++ xarray/core/dataarray.py | 5 ++-- xarray/core/dataset.py | 5 ++-- xarray/core/options.py | 2 +- xarray/test/test_dataarray.py | 41 ++++++++---------------------- xarray/test/test_dataset.py | 47 ++++++++--------------------------- 7 files changed, 51 insertions(+), 75 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index 7ba66f6db8b..f184bea9227 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -210,8 +210,8 @@ coordinates with the same name as a dimension, marked by ``*``) on objects used in binary operations. Similarly to pandas, this alignment is automatic for arithmetic on binary -operations. Note that unlike pandas, this the result of a binary operation is -by the *intersection* (not the union) of coordinate labels: +operations. The default result of a binary operation is by the *intersection* +(not the union) of coordinate labels: .. ipython:: python @@ -225,6 +225,23 @@ If the result would be empty, an error is raised instead: In [1]: arr[:2] + arr[2:] ValueError: no overlapping labels for some dimensions: ['x'] +However, one can explicitly change this default automatic alignment type ("inner") +via :py:func:`~xarray.set_options()` + +.. ipython:: python + + xr.set_options(arithmetic_join="outer") + arr + arr[:1] + +Note that this changes the alignment type for all ensuing binary operations. One +could also use :py:func:`~xarray.set_options()` in a context manager + +.. ipython:: python + + with xr.set_options(arithmetic_join="outer"): + arr + arr[:1] + arr + arr[:1] + Before loops or performance critical code, it's a good idea to align arrays explicitly (e.g., by putting them in the same Dataset or using :py:func:`~xarray.align`) to avoid the overhead of repeated alignment with each diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 58e6c66840b..53466af2418 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -44,6 +44,11 @@ Deprecations Enhancements ~~~~~~~~~~~~ +- Added the ability to change default automatic alignment (arithmetic_join="inner") + for binary operations via :py:func:`~xarray.set_options()` + (see :ref:`automatic alignment`). + By `Chun-Wei Yuan `_. + - Add checking of ``attr`` names and values when saving to netCDF, raising useful error messages if they are invalid. (:issue:`911`). By `Robin Wilson `_. diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c33f7fea381..c879f869f1b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1384,9 +1384,8 @@ def func(self, other): if isinstance(other, (Dataset, groupby.GroupBy)): return NotImplemented if hasattr(other, 'indexes'): - # if user does not specify join, default to OPTIONS['join'] - how_to_join = join or OPTIONS['join'] - self, other = align(self, other, join=how_to_join, copy=False) + align_type = OPTIONS['arithmetic_join'] if join is None else join + self, other = align(self, other, join=align_type, copy=False) other_variable = getattr(other, 'variable', other) other_coords = getattr(other, 'coords', None) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1515daa9c9b..c8fad7e3bcb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2025,9 +2025,8 @@ def func(self, other): if isinstance(other, groupby.GroupBy): return NotImplemented if hasattr(other, 'indexes'): - # if user does not specify join, default to OPTIONS['join'] - how_to_join = join or OPTIONS['join'] - self, other = align(self, other, join=how_to_join, copy=False) + align_type = OPTIONS['arithmetic_join'] if join is None else join + self, other = align(self, other, join=align_type, copy=False) g = f if not reflexive else lambda x, y: f(y, x) ds = self._calculate_binary_op(g, other, fillna=fillna) return ds diff --git a/xarray/core/options.py b/xarray/core/options.py index bb55c6e0ee5..0c5a94e4fb0 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -1,5 +1,5 @@ OPTIONS = {'display_width': 80, - 'join': "inner"} + 'arithmetic_join': "inner"} class set_options(object): diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index c90d4114a19..88fde9bf282 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -2270,33 +2270,14 @@ def test_dot(self): da.dot(DataArray(1)) def test_binary_op_join_setting(self): - """ - A test method to verify the ability to set binary operation join kwarg - ("inner", "outer", "left", "right") via xr.set_options(). - """ - # First we set up a data array - xdim, ydim, zdim = 'x', 'y', 'z' - xcoords, ycoords, zcoords = ['a', 'b', 'c'], [-2, 0, 2], [0, 1, 2] - total_size = len(xcoords) * len(ycoords) * len(zcoords) - # create a 3-by-3-by-3 data array - arr = xr.DataArray(np.arange(total_size).\ - reshape(len(xcoords),len(ycoords),len(zcoords)), - [(xdim, xcoords), (ydim, ycoords),(zdim, zcoords)]) - # now create a data array with the last x slice missing - missing_last_x = arr[0:-1,:,:].copy() - # create another data array with the last z slice missing - missing_last_z = arr[:,:,0:-1].copy() - # because the default in OPTIONS is join="inner", we test "outer" first - xr.set_options(join="outer") - result = missing_last_x + missing_last_z - self.assertTrue(result.shape == arr.shape) - self.assertTrue(result[-1,:,:].isnull().all()) - self.assertTrue(result[:,:,-1].isnull().all()) - # now revert back to join="inner" - xr.set_options(join="inner") - result = missing_last_x + missing_last_z - self.assertTrue(result.shape == \ - (len(xcoords)-1, len(ycoords), len(zcoords)-1)) - self.assertTrue(result.notnull().all()) - self.assertFalse('c' in list(result['x'])) - self.assertFalse(2 in list(result['z'])) + dim = 'x' + align_type = "outer" + coords_l, coords_r = [0, 1, 2], [1, 2, 3] + missing_0 = xr.DataArray(coords_l, [(dim, coords_l)]) + missing_3 = xr.DataArray(coords_r, [(dim, coords_r)]) + with xr.set_options(arithmetic_join=align_type): + experimental = missing_0 + missing_3 + missing_0_aligned, missing_3_aligned =\ + xr.align(missing_0, missing_3, join=align_type) + control = missing_0_aligned + missing_3_aligned + self.assertDataArrayEqual(experimental, control) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 98d9d2335ae..caaefc5eb9d 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -2918,42 +2918,17 @@ def test_filter_by_attrs(self): self.assertEqual(new_ds[var].height, '10 m') def test_binary_op_join_setting(self): - """ - A test method to verify the ability to set binary operation join kwarg - ("inner", "outer", "left", "right") via xr.set_options(). - """ - # First we set up a data array - xdim, ydim, zdim = 'x', 'y', 'z' - xcoords, ycoords, zcoords = ['a', 'b', 'c'], [-2, 0, 2], [0, 1, 2] - total_size = len(xcoords) * len(ycoords) * len(zcoords) - # create a 3-by-3-by-3 data array - arr = DataArray(np.arange(total_size).\ - reshape(len(xcoords),len(ycoords),len(zcoords)), - [(xdim, xcoords), (ydim, ycoords),(zdim, zcoords)]) - # now create a data array with the last x slice missing - arr1 = arr[0:-1,:,:].copy() - missing_last_x = arr1.to_dataset(name='foo') - # create another data array with the last z slice missing - arr2 = arr[:,:,0:-1].copy() - missing_last_z = arr2.to_dataset(name='foo') # needs to be name='foo' as well - # because the default in OPTIONS is join="inner", we test "outer" first - xr.set_options(join="outer") - result = missing_last_x + missing_last_z - self.assertTrue(result.foo.shape == arr.shape) - self.assertTrue(result.foo[-1,:,:].isnull().all()) - self.assertTrue(result.foo[:,:,-1].isnull().all()) - # now revert back to join="inner" - xr.set_options(join="inner") - result = missing_last_x + missing_last_z - self.assertTrue(result.foo.shape == \ - (len(xcoords)-1, len(ycoords), len(zcoords)-1)) - self.assertTrue(result.foo.notnull().all()) - self.assertFalse('c' in list(result.foo['x'])) - self.assertFalse(2 in list(result.foo['z'])) - # just for kicks, what happens when the dataarrays have different names? - misnomer = arr1.to_dataset(name='bar') - result = missing_last_x + misnomer - self.assertTrue(len(result.data_vars)==0) # empty dataset + dim = 'x' + align_type = "outer" + coords_l, coords_r = [0, 1, 2], [1, 2, 3] + missing_0 = xr.DataArray(coords_l, [(dim, coords_l)]).to_dataset(name='a') + missing_3 = xr.DataArray(coords_r, [(dim, coords_r)]).to_dataset(name='a') + with xr.set_options(arithmetic_join=align_type): + experimental = missing_0 + missing_3 + missing_0_aligned, missing_3_aligned =\ + xr.align(missing_0, missing_3, join=align_type) + control = missing_0_aligned + missing_3_aligned + self.assertDatasetEqual(experimental, control) ### Py.test tests From ab717fac0f2a29436b26ace4a918b57fc410a1a1 Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Fri, 28 Oct 2016 17:30:03 -0700 Subject: [PATCH 5/9] Emphasis on context manager for xr.set_options() use. --- doc/computation.rst | 10 +--------- xarray/test/test_dataarray.py | 12 ++++++------ xarray/test/test_dataset.py | 12 ++++++------ 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/doc/computation.rst b/doc/computation.rst index f184bea9227..9cba5db2061 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -226,15 +226,7 @@ If the result would be empty, an error is raised instead: ValueError: no overlapping labels for some dimensions: ['x'] However, one can explicitly change this default automatic alignment type ("inner") -via :py:func:`~xarray.set_options()` - -.. ipython:: python - - xr.set_options(arithmetic_join="outer") - arr + arr[:1] - -Note that this changes the alignment type for all ensuing binary operations. One -could also use :py:func:`~xarray.set_options()` in a context manager +via :py:func:`~xarray.set_options()` in context manager: .. ipython:: python diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 88fde9bf282..8e8e99be5dd 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -2273,11 +2273,11 @@ def test_binary_op_join_setting(self): dim = 'x' align_type = "outer" coords_l, coords_r = [0, 1, 2], [1, 2, 3] - missing_0 = xr.DataArray(coords_l, [(dim, coords_l)]) - missing_3 = xr.DataArray(coords_r, [(dim, coords_r)]) + missing_3 = xr.DataArray(coords_l, [(dim, coords_l)]) + missing_0 = xr.DataArray(coords_r, [(dim, coords_r)]) with xr.set_options(arithmetic_join=align_type): experimental = missing_0 + missing_3 - missing_0_aligned, missing_3_aligned =\ - xr.align(missing_0, missing_3, join=align_type) - control = missing_0_aligned + missing_3_aligned - self.assertDataArrayEqual(experimental, control) + missing_0_aligned, missing_3_aligned =\ + xr.align(missing_0, missing_3, join=align_type) + control = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])]) #missing_0_aligned + missing_3_aligned + self.assertDataArrayEqual(experimental, control) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index caaefc5eb9d..58db8ec8812 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -2921,14 +2921,14 @@ def test_binary_op_join_setting(self): dim = 'x' align_type = "outer" coords_l, coords_r = [0, 1, 2], [1, 2, 3] - missing_0 = xr.DataArray(coords_l, [(dim, coords_l)]).to_dataset(name='a') - missing_3 = xr.DataArray(coords_r, [(dim, coords_r)]).to_dataset(name='a') + missing_3 = xr.DataArray(coords_l, [(dim, coords_l)]).to_dataset(name='a') + missing_0 = xr.DataArray(coords_r, [(dim, coords_r)]).to_dataset(name='a') with xr.set_options(arithmetic_join=align_type): experimental = missing_0 + missing_3 - missing_0_aligned, missing_3_aligned =\ - xr.align(missing_0, missing_3, join=align_type) - control = missing_0_aligned + missing_3_aligned - self.assertDatasetEqual(experimental, control) + missing_0_aligned, missing_3_aligned =\ + xr.align(missing_0, missing_3, join=align_type) + control = missing_0_aligned + missing_3_aligned + self.assertDatasetEqual(experimental, control) ### Py.test tests From 8aa969b002ba890f220c20516f0437e2dea9237d Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Tue, 1 Nov 2016 10:39:53 -0700 Subject: [PATCH 6/9] Changed to actual vs expected testing nomenclature. --- xarray/test/test_dataarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 8e8e99be5dd..73e621ee337 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -2276,8 +2276,8 @@ def test_binary_op_join_setting(self): missing_3 = xr.DataArray(coords_l, [(dim, coords_l)]) missing_0 = xr.DataArray(coords_r, [(dim, coords_r)]) with xr.set_options(arithmetic_join=align_type): - experimental = missing_0 + missing_3 + actual = missing_0 + missing_3 missing_0_aligned, missing_3_aligned =\ xr.align(missing_0, missing_3, join=align_type) - control = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])]) #missing_0_aligned + missing_3_aligned - self.assertDataArrayEqual(experimental, control) + expected = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])]) + self.assertDataArrayEqual(actual, expected) From 46ad36a10f727731874f1a3efc6b5d0cadd03f08 Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Fri, 11 Nov 2016 14:22:56 -0800 Subject: [PATCH 7/9] Applies join options to Dataset.data_vars as well. --- xarray/core/dataset.py | 33 ++++++++++++++++------------ xarray/core/options.py | 4 ++++ xarray/test/test_dataset.py | 43 +++++++++++++++++++++++++++---------- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c8fad7e3bcb..45e3a79856a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2024,11 +2024,12 @@ def _binary_op(f, reflexive=False, join=None, fillna=False): def func(self, other): if isinstance(other, groupby.GroupBy): return NotImplemented + align_type = OPTIONS['arithmetic_join'] if join is None else join if hasattr(other, 'indexes'): - align_type = OPTIONS['arithmetic_join'] if join is None else join self, other = align(self, other, join=align_type, copy=False) g = f if not reflexive else lambda x, y: f(y, x) - ds = self._calculate_binary_op(g, other, fillna=fillna) + ds = self._calculate_binary_op(g, other, join=align_type, + fillna=fillna) return ds return func @@ -2050,25 +2051,30 @@ def func(self, other): return self return func - def _calculate_binary_op(self, f, other, inplace=False, fillna=False): + def _calculate_binary_op(self, f, other, join='inner', + inplace=False, fillna=False): def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): + if fillna and join != 'left': + raise ValueError('`fillna` must be accompanied by left join') if fillna and not set(rhs_data_vars) <= set(lhs_data_vars): raise ValueError('all variables in the argument to `fillna` ' 'must be contained in the original dataset') + if inplace and set(lhs_data_vars) != set(rhs_data_vars): + raise ValueError('datasets must have the same data variables ' + 'for in-place arithmetic operations: %s, %s' + % (list(lhs_data_vars), list(rhs_data_vars))) dest_vars = OrderedDict() - for k in lhs_data_vars: - if k in rhs_data_vars: - dest_vars[k] = f(lhs_vars[k], rhs_vars[k]) - elif inplace: - raise ValueError( - 'datasets must have the same data variables ' - 'for in-place arithmetic operations: %s, %s' - % (list(lhs_data_vars), list(rhs_data_vars))) - elif fillna: - # this shortcuts left alignment of variables for fillna + + for k in set(lhs_data_vars) & set(rhs_data_vars): + dest_vars[k] = f(lhs_vars[k], rhs_vars[k]) + if join in ["outer", "left"]: + for k in set(lhs_data_vars) - set(rhs_data_vars): dest_vars[k] = lhs_vars[k] + if join in ["outer", "right"]: + for k in set(rhs_data_vars) - set(lhs_data_vars): + dest_vars[k] = rhs_vars[k] return dest_vars if utils.is_dict_like(other) and not isinstance(other, Dataset): @@ -2088,7 +2094,6 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): other_variable = getattr(other, 'variable', other) new_vars = OrderedDict((k, f(self.variables[k], other_variable)) for k in self.data_vars) - ds._variables.update(new_vars) return ds diff --git a/xarray/core/options.py b/xarray/core/options.py index 0c5a94e4fb0..763b101a6cf 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -26,6 +26,10 @@ class set_options(object): """ def __init__(self, **kwargs): self.old = OPTIONS.copy() + for key in kwargs: + if key not in OPTIONS: + raise KeyError("acceptable keys are: {}".\ + format(', '.join(OPTIONS.keys()))) OPTIONS.update(kwargs) def __enter__(self): diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 58db8ec8812..db32a780af6 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -2918,17 +2918,38 @@ def test_filter_by_attrs(self): self.assertEqual(new_ds[var].height, '10 m') def test_binary_op_join_setting(self): - dim = 'x' - align_type = "outer" - coords_l, coords_r = [0, 1, 2], [1, 2, 3] - missing_3 = xr.DataArray(coords_l, [(dim, coords_l)]).to_dataset(name='a') - missing_0 = xr.DataArray(coords_r, [(dim, coords_r)]).to_dataset(name='a') - with xr.set_options(arithmetic_join=align_type): - experimental = missing_0 + missing_3 - missing_0_aligned, missing_3_aligned =\ - xr.align(missing_0, missing_3, join=align_type) - control = missing_0_aligned + missing_3_aligned - self.assertDatasetEqual(experimental, control) + # arithmetic_join applies to data array coordinates + missing_2 = xr.Dataset({'x':[0, 1]}) + missing_0 = xr.Dataset({'x':[1, 2]}) + with xr.set_options(arithmetic_join='outer'): + actual = missing_2 + missing_0 + expected = xr.Dataset({'x':[0, 1, 2]}) + self.assertDatasetEqual(actual, expected) + + # arithmetic join also applies to data_vars + ds1 = xr.Dataset({'foo': 1, 'bar': 2}) + ds2 = xr.Dataset({'bar': 2, 'baz': 3}) + expected = xr.Dataset({'bar': 4}) # default is inner joining + actual = ds1 + ds2 + self.assertDatasetEqual(actual, expected) + + with xr.set_options(arithmetic_join='outer'): + expected = xr.Dataset({'foo':1, 'bar': 4, 'baz': 3}) + actual = ds1 + ds2 + self.assertDatasetEqual(actual, expected) + + with xr.set_options(arithmetic_join='left'): + expected = xr.Dataset({'foo':1, 'bar': 4}) + actual = ds1 + ds2 + self.assertDatasetEqual(actual, expected) + + with xr.set_options(arithmetic_join='right'): + expected = xr.Dataset({'baz':3, 'bar': 4}) + actual = ds1 + ds2 + self.assertDatasetEqual(actual, expected) + + + ### Py.test tests From a7b7497396972e3c9d14d8452e34833228738f20 Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Fri, 11 Nov 2016 19:19:08 -0800 Subject: [PATCH 8/9] Preserve order of joined data_vars, left-to-right. Use np.nan as default filler. --- xarray/core/dataset.py | 18 ++++++++++-------- xarray/core/options.py | 4 ---- xarray/test/test_dataset.py | 8 ++++---- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1750aac6a9e..c1418ab8455 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2061,14 +2061,16 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): dest_vars = OrderedDict() - for k in set(lhs_data_vars) & set(rhs_data_vars): - dest_vars[k] = f(lhs_vars[k], rhs_vars[k]) - if join in ["outer", "left"]: - for k in set(lhs_data_vars) - set(rhs_data_vars): - dest_vars[k] = lhs_vars[k] - if join in ["outer", "right"]: - for k in set(rhs_data_vars) - set(lhs_data_vars): - dest_vars[k] = rhs_vars[k] + for k in lhs_data_vars: + if k in rhs_data_vars: + dest_vars[k] = f(lhs_vars[k], rhs_vars[k]) + elif join in ["left", "outer"]: + dest_vars[k] = lhs_vars[k] if fillna else\ + f(lhs_vars[k], np.nan) + for k in rhs_data_vars: + if k not in dest_vars and join in ["right", "outer"]: + dest_vars[k] = rhs_vars[k] if fillna else\ + f(rhs_vars[k], np.nan) return dest_vars if utils.is_dict_like(other) and not isinstance(other, Dataset): diff --git a/xarray/core/options.py b/xarray/core/options.py index 9fc7cef8dfb..c6d9be059ef 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -33,10 +33,6 @@ def __init__(self, **kwargs): raise ValueError('argument names %r are not in the set of valid ' 'options %r' % (invalid_options, set(OPTIONS))) self.old = OPTIONS.copy() - for key in kwargs: - if key not in OPTIONS: - raise KeyError("acceptable keys are: {}".\ - format(', '.join(OPTIONS.keys()))) OPTIONS.update(kwargs) def __enter__(self): diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 7bde1cdeafb..4d3bccabd3b 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -2951,19 +2951,19 @@ def test_binary_op_join_setting(self): expected = xr.Dataset({'bar': 4}) # default is inner joining actual = ds1 + ds2 self.assertDatasetEqual(actual, expected) - + with xr.set_options(arithmetic_join='outer'): - expected = xr.Dataset({'foo':1, 'bar': 4, 'baz': 3}) + expected = xr.Dataset({'foo': np.nan, 'bar': 4, 'baz': np.nan}) actual = ds1 + ds2 self.assertDatasetEqual(actual, expected) with xr.set_options(arithmetic_join='left'): - expected = xr.Dataset({'foo':1, 'bar': 4}) + expected = xr.Dataset({'foo': np.nan, 'bar': 4}) actual = ds1 + ds2 self.assertDatasetEqual(actual, expected) with xr.set_options(arithmetic_join='right'): - expected = xr.Dataset({'baz':3, 'bar': 4}) + expected = xr.Dataset({'bar': 4, 'baz': np.nan}) actual = ds1 + ds2 self.assertDatasetEqual(actual, expected) From 88c37bcf9928e4ff4053327b134233ec25183d50 Mon Sep 17 00:00:00 2001 From: Chun-Wei Yuan Date: Fri, 11 Nov 2016 19:52:58 -0800 Subject: [PATCH 9/9] PEP8 and doctring. --- xarray/core/dataset.py | 8 ++++---- xarray/core/options.py | 7 +++++-- xarray/test/test_dataarray.py | 7 ++++--- xarray/test/test_dataset.py | 3 --- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c1418ab8455..964bd3dbb7a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2065,12 +2065,12 @@ def apply_over_both(lhs_data_vars, rhs_data_vars, lhs_vars, rhs_vars): if k in rhs_data_vars: dest_vars[k] = f(lhs_vars[k], rhs_vars[k]) elif join in ["left", "outer"]: - dest_vars[k] = lhs_vars[k] if fillna else\ - f(lhs_vars[k], np.nan) + dest_vars[k] = (lhs_vars[k] if fillna else + f(lhs_vars[k], np.nan)) for k in rhs_data_vars: if k not in dest_vars and join in ["right", "outer"]: - dest_vars[k] = rhs_vars[k] if fillna else\ - f(rhs_vars[k], np.nan) + dest_vars[k] = (rhs_vars[k] if fillna else + f(rhs_vars[k], np.nan)) return dest_vars if utils.is_dict_like(other) and not isinstance(other, Dataset): diff --git a/xarray/core/options.py b/xarray/core/options.py index c6d9be059ef..65264bf4919 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -8,8 +8,11 @@ class set_options(object): """Set options for xarray in a controlled context. - Currently, the only supported option is ``display_width``, which has a - default value of 80. + Currently, the only supported options are: + 1.) display_width: maximum terminal display width of data arrays. + Default=80. + 2.) arithmetic_join: dataarray/dataset alignment in binary operations. + Default='inner'. You can use ``set_options`` either as a context manager: diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index b90a07cc2e9..d5ee9851469 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -2287,7 +2287,8 @@ def test_binary_op_join_setting(self): missing_0 = xr.DataArray(coords_r, [(dim, coords_r)]) with xr.set_options(arithmetic_join=align_type): actual = missing_0 + missing_3 - missing_0_aligned, missing_3_aligned =\ - xr.align(missing_0, missing_3, join=align_type) - expected = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])]) + missing_0_aligned, missing_3_aligned = xr.align(missing_0, + missing_3, + join=align_type) + expected = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])]) self.assertDataArrayEqual(actual, expected) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 4d3bccabd3b..5b8af6b6437 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -2968,9 +2968,6 @@ def test_binary_op_join_setting(self): self.assertDatasetEqual(actual, expected) - - - ### Py.test tests