11
11
import numpy as np
12
12
13
13
from .backends .api import open_dataset as _open_dataset
14
- from .backends .rasterio_ import open_rasterio
14
+ from .backends .rasterio_ import open_rasterio as _open_rasterio
15
15
from .core .dataarray import DataArray
16
16
from .core .dataset import Dataset
17
17
18
-
19
- def _open_rasterio (path , engine = None , ** kwargs ):
20
- data = open_rasterio (path , ** kwargs )
21
- name = data .name if data .name is not None else "data"
22
- return data .to_dataset (name = name )
23
-
24
-
25
18
_default_cache_dir_name = "xarray_tutorial_data"
26
19
base_url = "https://github.com/pydata/xarray-data"
27
20
version = "master"
28
21
29
22
30
- external_urls = {
31
- "RGB.byte" : (
32
- "rasterio" ,
33
- "https://github.com/mapbox/rasterio/raw/master/tests/data/RGB.byte.tif" ,
34
- ),
35
- }
36
- overrides = {
37
- "rasterio" : _open_rasterio ,
23
+ def _construct_cache_dir (path ):
24
+ import pooch
25
+
26
+ if isinstance (path , pathlib .Path ):
27
+ path = os .fspath (path )
28
+ elif path is None :
29
+ path = pooch .os_cache (_default_cache_dir_name )
30
+
31
+ return path
32
+
33
+
34
+ external_urls = {} # type: dict
35
+ external_rasterio_urls = {
36
+ "RGB.byte" : "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/RGB.byte.tif" ,
37
+ "shade" : "https://github.com/mapbox/rasterio/raw/1.2.1/tests/data/shade.tif" ,
38
38
}
39
39
40
40
41
41
# idea borrowed from Seaborn
42
42
def open_dataset (
43
43
name ,
44
- engine = None ,
45
44
cache = True ,
46
45
cache_dir = None ,
47
46
** kws ,
@@ -51,31 +50,27 @@ def open_dataset(
51
50
52
51
If a local copy is found then always use that to avoid network traffic.
53
52
53
+ Available datasets:
54
+
55
+ * ``"air_temperature"``: NCEP reanalysis subset
56
+ * ``"rasm"``: Output of the Regional Arctic System Model (RASM)
57
+ * ``"ROMS_example"``: Regional Ocean Model System (ROMS) output
58
+ * ``"tiny"``: small synthetic dataset with a 1D data variable
59
+ * ``"era5-2mt-2019-03-uk.grib"``: ERA5 temperature data over the UK
60
+ * ``"eraint_uvz"``: data from ERA-Interim reanalysis, monthly averages of upper level data
61
+
54
62
Parameters
55
63
----------
56
64
name : str
57
65
Name of the file containing the dataset.
58
66
e.g. 'air_temperature'
59
- engine : str, optional
60
- The engine to use.
61
67
cache_dir : path-like, optional
62
68
The directory in which to search for and write cached data.
63
69
cache : bool, optional
64
70
If True, then cache data locally for use on subsequent calls
65
71
**kws : dict, optional
66
72
Passed to xarray.open_dataset
67
73
68
- Notes
69
- -----
70
- Available datasets:
71
-
72
- * ``"air_temperature"``
73
- * ``"rasm"``
74
- * ``"ROMS_example"``
75
- * ``"tiny"``
76
- * ``"era5-2mt-2019-03-uk.grib"``
77
- * ``"RGB.byte"``: example rasterio file from https://github.com/mapbox/rasterio
78
-
79
74
See Also
80
75
--------
81
76
xarray.open_dataset
@@ -85,15 +80,12 @@ def open_dataset(
85
80
except ImportError :
86
81
raise ImportError ("using the tutorial data requires pooch" )
87
82
88
- if isinstance (cache_dir , pathlib .Path ):
89
- cache_dir = os .fspath (cache_dir )
90
- elif cache_dir is None :
91
- cache_dir = pooch .os_cache (_default_cache_dir_name )
83
+ logger = pooch .get_logger ()
84
+ logger .setLevel ("WARNING" )
92
85
86
+ cache_dir = _construct_cache_dir (cache_dir )
93
87
if name in external_urls :
94
- engine_ , url = external_urls [name ]
95
- if engine is None :
96
- engine = engine_
88
+ url = external_urls [name ]
97
89
else :
98
90
# process the name
99
91
default_extension = ".nc"
@@ -103,17 +95,78 @@ def open_dataset(
103
95
104
96
url = f"{ base_url } /raw/{ version } /{ path .name } "
105
97
106
- _open = overrides .get (engine , _open_dataset )
107
98
# retrieve the file
108
99
filepath = pooch .retrieve (url = url , known_hash = None , path = cache_dir )
109
- ds = _open (filepath , engine = engine , ** kws )
100
+ ds = _open_dataset (filepath , ** kws )
110
101
if not cache :
111
102
ds = ds .load ()
112
103
pathlib .Path (filepath ).unlink ()
113
104
114
105
return ds
115
106
116
107
108
+ def open_rasterio (
109
+ name ,
110
+ engine = None ,
111
+ cache = True ,
112
+ cache_dir = None ,
113
+ ** kws ,
114
+ ):
115
+ """
116
+ Open a rasterio dataset from the online repository (requires internet).
117
+
118
+ If a local copy is found then always use that to avoid network traffic.
119
+
120
+ Available datasets:
121
+
122
+ * ``"RGB.byte"``: TIFF file derived from USGS Landsat 7 ETM imagery.
123
+ * ``"shade"``: TIFF file derived from from USGS SRTM 90 data
124
+
125
+ ``RGB.byte`` and ``shade`` are downloaded from the ``rasterio`` repository [1]_.
126
+
127
+ Parameters
128
+ ----------
129
+ name : str
130
+ Name of the file containing the dataset.
131
+ e.g. 'RGB.byte'
132
+ cache_dir : path-like, optional
133
+ The directory in which to search for and write cached data.
134
+ cache : bool, optional
135
+ If True, then cache data locally for use on subsequent calls
136
+ **kws : dict, optional
137
+ Passed to xarray.open_rasterio
138
+
139
+ See Also
140
+ --------
141
+ xarray.open_rasterio
142
+
143
+ References
144
+ ----------
145
+ .. [1] https://github.com/mapbox/rasterio
146
+ """
147
+ try :
148
+ import pooch
149
+ except ImportError :
150
+ raise ImportError ("using the tutorial data requires pooch" )
151
+
152
+ logger = pooch .get_logger ()
153
+ logger .setLevel ("WARNING" )
154
+
155
+ cache_dir = _construct_cache_dir (cache_dir )
156
+ url = external_rasterio_urls .get (name )
157
+ if url is None :
158
+ raise ValueError (f"unknown rasterio dataset: { name } " )
159
+
160
+ # retrieve the file
161
+ filepath = pooch .retrieve (url = url , known_hash = None , path = cache_dir )
162
+ arr = _open_rasterio (filepath , ** kws )
163
+ if not cache :
164
+ arr = arr .load ()
165
+ pathlib .Path (filepath ).unlink ()
166
+
167
+ return arr
168
+
169
+
117
170
def load_dataset (* args , ** kwargs ):
118
171
"""
119
172
Open, load into memory, and close a dataset from the online repository
0 commit comments