|
| 1 | + |
| 2 | +Working with Multidimensional Coordinates |
| 3 | +========================================= |
| 4 | + |
| 5 | +Author: `Ryan Abernathey <http://github.org/rabernat>`__ |
| 6 | + |
| 7 | +Many datasets have *physical coordinates* which differ from their |
| 8 | +*logical coordinates*. Xarray provides several ways to plot and analyze |
| 9 | +such datasets. |
| 10 | + |
| 11 | +.. code:: python |
| 12 | +
|
| 13 | + %matplotlib inline |
| 14 | + import numpy as np |
| 15 | + import pandas as pd |
| 16 | + import xarray as xr |
| 17 | + import cartopy.crs as ccrs |
| 18 | + from matplotlib import pyplot as plt |
| 19 | + |
| 20 | + print("numpy version : ", np.__version__) |
| 21 | + print("pandas version : ", pd.__version__) |
| 22 | + print("xarray version : ", xr.version.version) |
| 23 | +
|
| 24 | +
|
| 25 | +.. parsed-literal:: |
| 26 | +
|
| 27 | + ('numpy version : ', '1.11.0') |
| 28 | + ('pandas version : ', u'0.18.0') |
| 29 | + ('xarray version : ', '0.7.2-32-gf957eb8') |
| 30 | +
|
| 31 | +
|
| 32 | +As an example, consider this dataset from the |
| 33 | +`xarray-data <https://github.com/pydata/xarray-data>`__ repository. |
| 34 | + |
| 35 | +.. code:: python |
| 36 | +
|
| 37 | + ! curl -L -O https://github.com/pydata/xarray-data/raw/master/RASM_example_data.nc |
| 38 | +
|
| 39 | +.. code:: python |
| 40 | +
|
| 41 | + ds = xr.open_dataset('RASM_example_data.nc') |
| 42 | + ds |
| 43 | +
|
| 44 | +
|
| 45 | +
|
| 46 | +
|
| 47 | +.. parsed-literal:: |
| 48 | +
|
| 49 | + <xarray.Dataset> |
| 50 | + Dimensions: (time: 36, x: 275, y: 205) |
| 51 | + Coordinates: |
| 52 | + * time (time) datetime64[ns] 1980-09-16T12:00:00 1980-10-17 ... |
| 53 | + yc (y, x) float64 16.53 16.78 17.02 17.27 17.51 17.76 18.0 18.25 ... |
| 54 | + xc (y, x) float64 189.2 189.4 189.6 189.7 189.9 190.1 190.2 190.4 ... |
| 55 | + * x (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ... |
| 56 | + * y (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 ... |
| 57 | + Data variables: |
| 58 | + Tair (time, y, x) float64 nan nan nan nan nan nan nan nan nan nan ... |
| 59 | + Attributes: |
| 60 | + title: /workspace/jhamman/processed/R1002RBRxaaa01a/lnd/temp/R1002RBRxaaa01a.vic.ha.1979-09-01.nc |
| 61 | + institution: U.W. |
| 62 | + source: RACM R1002RBRxaaa01a |
| 63 | + output_frequency: daily |
| 64 | + output_mode: averaged |
| 65 | + convention: CF-1.4 |
| 66 | + references: Based on the initial model of Liang et al., 1994, JGR, 99, 14,415- 14,429. |
| 67 | + comment: Output from the Variable Infiltration Capacity (VIC) model. |
| 68 | + nco_openmp_thread_number: 1 |
| 69 | + NCO: 4.3.7 |
| 70 | + history: history deleted for brevity |
| 71 | +
|
| 72 | +
|
| 73 | +
|
| 74 | +In this example, the *logical coordinates* are ``x`` and ``y``, while |
| 75 | +the *physical coordinates* are ``xc`` and ``yc``, which represent the |
| 76 | +latitudes and longitude of the data. |
| 77 | + |
| 78 | +.. code:: python |
| 79 | +
|
| 80 | + print(ds.xc.attrs) |
| 81 | + print(ds.yc.attrs) |
| 82 | +
|
| 83 | +
|
| 84 | +.. parsed-literal:: |
| 85 | +
|
| 86 | + OrderedDict([(u'long_name', u'longitude of grid cell center'), (u'units', u'degrees_east'), (u'bounds', u'xv')]) |
| 87 | + OrderedDict([(u'long_name', u'latitude of grid cell center'), (u'units', u'degrees_north'), (u'bounds', u'yv')]) |
| 88 | +
|
| 89 | +
|
| 90 | +Plotting |
| 91 | +-------- |
| 92 | + |
| 93 | +Let's examine these coordinate variables by plotting them. |
| 94 | + |
| 95 | +.. code:: python |
| 96 | +
|
| 97 | + fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(14,4)) |
| 98 | + ds.xc.plot(ax=ax1) |
| 99 | + ds.yc.plot(ax=ax2) |
| 100 | +
|
| 101 | +
|
| 102 | +
|
| 103 | +
|
| 104 | +.. parsed-literal:: |
| 105 | +
|
| 106 | + <matplotlib.collections.QuadMesh at 0x118688fd0> |
| 107 | +
|
| 108 | +
|
| 109 | +
|
| 110 | +.. parsed-literal:: |
| 111 | +
|
| 112 | + /Users/rpa/anaconda/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison |
| 113 | + if self._edgecolors == str('face'): |
| 114 | +
|
| 115 | +
|
| 116 | +
|
| 117 | +.. image:: multidimensional_coords_files/xarray_multidimensional_coords_8_2.png |
| 118 | + |
| 119 | + |
| 120 | +Note that the variables ``xc`` (longitude) and ``yc`` (latitude) are |
| 121 | +two-dimensional scalar fields. |
| 122 | + |
| 123 | +If we try to plot the data variable ``Tair``, by default we get the |
| 124 | +logical coordinates. |
| 125 | + |
| 126 | +.. code:: python |
| 127 | +
|
| 128 | + ds.Tair[0].plot() |
| 129 | +
|
| 130 | +
|
| 131 | +
|
| 132 | +
|
| 133 | +.. parsed-literal:: |
| 134 | +
|
| 135 | + <matplotlib.collections.QuadMesh at 0x11b6da890> |
| 136 | +
|
| 137 | +
|
| 138 | +
|
| 139 | +
|
| 140 | +.. image:: multidimensional_coords_files/xarray_multidimensional_coords_10_1.png |
| 141 | + |
| 142 | + |
| 143 | +In order to visualize the data on a conventional latitude-longitude |
| 144 | +grid, we can take advantage of xarray's ability to apply |
| 145 | +`cartopy <http://scitools.org.uk/cartopy/index.html>`__ map projections. |
| 146 | + |
| 147 | +.. code:: python |
| 148 | +
|
| 149 | + plt.figure(figsize=(14,6)) |
| 150 | + ax = plt.axes(projection=ccrs.PlateCarree()) |
| 151 | + ax.set_global() |
| 152 | + ds.Tair[0].plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), x='xc', y='yc', add_colorbar=False) |
| 153 | + ax.coastlines() |
| 154 | + ax.set_ylim([0,90]); |
| 155 | +
|
| 156 | +
|
| 157 | +
|
| 158 | +.. image:: multidimensional_coords_files/xarray_multidimensional_coords_12_0.png |
| 159 | + |
| 160 | + |
| 161 | +Multidimensional Groupby |
| 162 | +------------------------ |
| 163 | + |
| 164 | +The above example allowed us to visualize the data on a regular |
| 165 | +latitude-longitude grid. But what if we want to do a calculation that |
| 166 | +involves grouping over one of these physical coordinates (rather than |
| 167 | +the logical coordinates), for example, calculating the mean temperature |
| 168 | +at each latitude. This can be achieved using xarray's ``groupby`` |
| 169 | +function, which accepts multidimensional variables. By default, |
| 170 | +``groupby`` will use every unique value in the variable, which is |
| 171 | +probably not what we want. Instead, we can use the ``groupby_bins`` |
| 172 | +function to specify the output coordinates of the group. |
| 173 | + |
| 174 | +.. code:: python |
| 175 | +
|
| 176 | + # define two-degree wide latitude bins |
| 177 | + lat_bins = np.arange(0,91,2) |
| 178 | + # define a label for each bin corresponding to the central latitude |
| 179 | + lat_center = np.arange(1,90,2) |
| 180 | + # group according to those bins and take the mean |
| 181 | + Tair_lat_mean = ds.Tair.groupby_bins('xc', lat_bins, labels=lat_center).mean() |
| 182 | + # plot the result |
| 183 | + Tair_lat_mean.plot() |
| 184 | +
|
| 185 | +
|
| 186 | +
|
| 187 | +
|
| 188 | +.. parsed-literal:: |
| 189 | +
|
| 190 | + [<matplotlib.lines.Line2D at 0x11cb92e90>] |
| 191 | +
|
| 192 | +
|
| 193 | +
|
| 194 | +
|
| 195 | +.. image:: multidimensional_coords_files/xarray_multidimensional_coords_14_1.png |
| 196 | + |
| 197 | + |
| 198 | +Note that the resulting coordinate for the ``groupby_bins`` operation |
| 199 | +got the ``_bins`` suffix appended: ``xc_bins``. This help us distinguish |
| 200 | +it from the original multidimensional variable ``xc``. |
0 commit comments