diff --git a/doc/data-structures.rst b/doc/data-structures.rst index b274d84535a..f24358bffd8 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -48,7 +48,7 @@ The :py:class:`~xarray.DataArray` constructor takes: :py:class:`~pandas.Series`, :py:class:`~pandas.DataFrame` or :py:class:`~pandas.Panel`) - ``coords``: a list or dictionary of coordinates - ``dims``: a list of dimension names. If omitted, dimension names are - taken from ``coords`` if possible + taken from ``coords`` if possible. - ``attrs``: a dictionary of attributes to add to the instance - ``name``: a string that names the instance @@ -69,15 +69,19 @@ in with default values: As you can see, dimension names are always present in the xarray data model: if you do not provide them, defaults of the form ``dim_N`` will be created. +However, coordinates are optional. If you do not specific coordinates for a +dimension, the axis name will appear under the list of "Unindexed dimensions". .. note:: - Prior to xarray v0.9, coordinates corresponding to dimension were *also* - always present in xarray: xarray would create default coordinates of the form - ``range(dim_size)`` if coordinates were not supplied explicitly. This is no - longer the case. + This is different from pandas, where axes always have tick labels, which + default to the integers ``[0, ..., n-1]``. -Coordinates can take the following forms: + Prior to xarray v0.9, xarray copied this behavior: default coordinates for + each dimension would be created if coordinates were not supplied explicitly. + This is no longer the case. + +Coordinates can be specified in the following ways: - A list of values with length equal to the number of dimensions, providing coordinate labels for each dimension. Each value must be of one of the @@ -243,8 +247,8 @@ Creating a Dataset To make an :py:class:`~xarray.Dataset` from scratch, supply dictionaries for any variables (``data_vars``), coordinates (``coords``) and attributes (``attrs``). -- ``data_vars`` should be a dictionary with each key as the name of the variable and each -value as one of: +- ``data_vars`` should be a dictionary with each key as the name of the variable + and each value as one of: * A :py:class:`~xarray.DataArray` or :py:class:`~xarray.Variable` * A tuple of the form ``(dims, data[, attrs])``, which is converted into diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 79a629cce20..6827282fe2d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,7 +31,25 @@ Breaking changes ~~~~~~~~~~~~~~~~ - Index coordinates for each dimensions are now optional, and no longer created - by default :issue:`1017`. This has a number of implications: + by default :issue:`1017`. You can identify such dimensions without indexes by + their appearance in list of "Unindexed dimensions" in the ``Dataset`` or + ``DataArray`` repr: + + .. ipython:: + :verbatim: + + In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])}) + Out[1]: + + Dimensions: (x: 1, y: 2) + Coordinates: + *empty* + Unindexed dimensions: + x, y + Data variables: + foo (x, y) int64 1 2 + + This has a number of implications: - :py:func:`~align` and :py:meth:`~Dataset.reindex` can now error, if dimensions labels are missing and dimensions have different sizes. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d7a3631ba6c..2f5a443e63c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -276,7 +276,7 @@ def __getitem__(self, key): raise KeyError(key) def __unicode__(self): - return formatting.vars_repr(self) + return formatting.data_vars_repr(self) @property def variables(self): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 8187434348e..c9c1b513fc4 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -211,13 +211,6 @@ def _summarize_var_or_coord(name, var, col_width, show_values=True, return front_str + values_str -def _summarize_dummy_var(name, col_width, marker=u'o', values=u'-'): - """Used if there is no coordinate for a dimension.""" - first_col = pretty_print(u' %s %s ' % (marker, name), col_width) - dims_str = u'(%s) ' % unicode_type(name) - return u'%s%s%s' % (first_col, dims_str, values) - - def _summarize_coord_multiindex(coord, col_width, marker): first_col = pretty_print(u' %s %s ' % (marker, coord.name), col_width) return u'%s(%s) MultiIndex' % (first_col, unicode_type(coord.dims[0])) @@ -248,8 +241,6 @@ def summarize_var(name, var, col_width): def summarize_coord(name, var, col_width): - if var is None: - return _summarize_dummy_var(name, col_width) is_index = name in var.dims show_values = is_index or _not_remote(var) marker = u'*' if is_index else u' ' @@ -305,8 +296,8 @@ def _mapping_repr(mapping, title, summarizer, col_width=None): return u'\n'.join(summary) -vars_repr = functools.partial(_mapping_repr, title=u'Data variables', - summarizer=summarize_var) +data_vars_repr = functools.partial(_mapping_repr, title=u'Data variables', + summarizer=summarize_var) attrs_repr = functools.partial(_mapping_repr, title=u'Attributes', @@ -316,12 +307,7 @@ def _mapping_repr(mapping, title, summarizer, col_width=None): def coords_repr(coords, col_width=None): if col_width is None: col_width = _calculate_col_width(_get_col_items(coords)) - # augment coordinates to include markers for missing coordinates - augmented_coords = OrderedDict(coords) - for dim in coords.dims: - if dim not in augmented_coords: - augmented_coords[dim] = None - return _mapping_repr(augmented_coords, title=u'Coordinates', + return _mapping_repr(coords, title=u'Coordinates', summarizer=summarize_coord, col_width=col_width) @@ -337,6 +323,15 @@ def dim_summary(obj): return u', '.join(elements) +def unindexed_dims_repr(dims, coords): + unindexed_dims = [d for d in dims if d not in coords] + if unindexed_dims: + dims_str = u', '.join(u'%s' % d for d in unindexed_dims) + return u'Unindexed dimensions:\n' + u' ' * 4 + dims_str + else: + return None + + @contextlib.contextmanager def set_numpy_options(*args, **kwargs): original = np.get_printoptions() @@ -386,6 +381,10 @@ def array_repr(arr): if arr.coords: summary.append(repr(arr.coords)) + unindexed_dims_str = unindexed_dims_repr(arr.dims, arr.coords) + if unindexed_dims_str: + summary.append(unindexed_dims_str) + if arr.attrs: summary.append(attrs_repr(arr.attrs)) @@ -401,7 +400,13 @@ def dataset_repr(ds): summary.append(u'%s(%s)' % (dims_start, dim_summary(ds))) summary.append(coords_repr(ds.coords, col_width=col_width)) - summary.append(vars_repr(ds.data_vars, col_width=col_width)) + + unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords) + if unindexed_dims_str: + summary.append(unindexed_dims_str) + + summary.append(data_vars_repr(ds.data_vars, col_width=col_width)) + if ds.attrs: summary.append(attrs_repr(ds.attrs)) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index e58fa131844..cd33e285455 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -46,7 +46,8 @@ def test_repr(self): Coordinates: * x (x) int64 0 1 2 other int64 0 - o time (time) - + Unindexed dimensions: + time Attributes: foo: bar""") self.assertEqual(expected, repr(data_array)) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 54f6cb273a9..e7239c6a22e 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -91,7 +91,8 @@ def test_repr(self): * dim2 (dim2) float64 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 * dim3 (dim3) %s 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' numbers (dim3) int64 0 1 2 0 0 1 1 2 2 3 - o dim1 (dim1) - + Unindexed dimensions: + dim1 Data variables: var1 (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 ... var2 (dim1, dim2) float64 1.162 -1.097 -2.123 1.04 -0.4034 -0.126 ... @@ -203,25 +204,25 @@ def test_info(self): expected = dedent(u'''\ xarray.Dataset { dimensions: - dim1 = 8 ; - dim2 = 9 ; - dim3 = 10 ; - time = 20 ; + \tdim1 = 8 ; + \tdim2 = 9 ; + \tdim3 = 10 ; + \ttime = 20 ; variables: - datetime64[ns] time(time) ; - float64 dim2(dim2) ; - float64 var1(dim1, dim2) ; - var1:foo = variable ; - float64 var2(dim1, dim2) ; - var2:foo = variable ; - float64 var3(dim3, dim1) ; - var3:foo = variable ; - int64 numbers(dim3) ; + \tdatetime64[ns] time(time) ; + \tfloat64 dim2(dim2) ; + \tfloat64 var1(dim1, dim2) ; + \t\tvar1:foo = variable ; + \tfloat64 var2(dim1, dim2) ; + \t\tvar2:foo = variable ; + \tfloat64 var3(dim3, dim1) ; + \t\tvar3:foo = variable ; + \tint64 numbers(dim3) ; // global attributes: - :unicode_attr = ba® ; - :string_attr = bar ; + \t:unicode_attr = ba® ; + \t:string_attr = bar ; }''') actual = buf.getvalue() self.assertEqual(expected, actual) @@ -685,6 +686,23 @@ def test_coords_merge_mismatched_shape(self): actual = orig_coords.merge(other_coords) self.assertDatasetIdentical(expected, actual) + def test_data_vars_properties(self): + ds = Dataset() + ds['foo'] = (('x',), [1.0]) + ds['bar'] = 2.0 + + self.assertEqual(set(ds.data_vars), {'foo', 'bar'}) + self.assertIn('foo', ds.data_vars) + self.assertNotIn('x', ds.data_vars) + self.assertDataArrayIdentical(ds['foo'], ds.data_vars['foo']) + + expected = dedent("""\ + Data variables: + foo (x) float64 1.0 + bar float64 2.0""") + actual = repr(ds.data_vars) + self.assertEqual(expected, actual) + def test_equals_and_identical(self): data = create_test_data(seed=42) self.assertTrue(data.equals(data)) @@ -3101,7 +3119,7 @@ def test_filter_by_attrs(self): ds = Dataset({'temperature_0': (['t'], [0], temp0), 'temperature_10': (['t'], [0], temp10), 'precipitation': (['t'], [0], precip)}, - coords={'time': (['t'], [0], dict(axis='T'))}) + coords={'time': (['t'], [0], dict(axis='T'))}) # Test return empty Dataset. ds.filter_by_attrs(standard_name='invalid_standard_name')