-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Feature/pickle rasterio #2131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/pickle rasterio #2131
Changes from all commits
4319aac
fb8b3d7
2b7176b
c8de3a5
45f0aeb
99c9661
55a3abc
8b286c0
6669035
097e264
822a080
40c15a6
78ffcb4
35520c0
8d7c768
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
import traceback | ||
import warnings | ||
from collections import Mapping, OrderedDict | ||
from functools import partial | ||
|
||
import numpy as np | ||
|
||
|
@@ -507,3 +508,30 @@ def assert_open(self): | |
if not self._isopen: | ||
raise AssertionError('internal failure: file must be open ' | ||
'if `autoclose=True` is used.') | ||
|
||
|
||
class PickleByReconstructionWrapper(object): | ||
|
||
def __init__(self, opener, file, mode='r', **kwargs): | ||
self.opener = partial(opener, file, mode=mode, **kwargs) | ||
self.mode = mode | ||
self._ds = None | ||
|
||
@property | ||
def value(self): | ||
self._ds = self.opener() | ||
return self._ds | ||
|
||
def __getstate__(self): | ||
state = self.__dict__.copy() | ||
del state['_ds'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't we close the file here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might be used after it is pickled. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah OK, maybe I misunderstood. I thought the purpose of this wrapper was to dereference (and close) the file before 'dump()' and re-open it after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The basic idea is that most open file handles can't be pickled so we need to provide a mechanism to remove the existing handle and generate a new one in the dump/load steps in the pickling. In most cases, we do want to keep the original file open. |
||
if self.mode == 'w': | ||
# file has already been created, don't override when restoring | ||
state['mode'] = 'a' | ||
return state | ||
|
||
def __setstate__(self, state): | ||
self.__dict__.update(state) | ||
|
||
def close(self): | ||
self._ds.close() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,8 @@ | |
from xarray import ( | ||
DataArray, Dataset, backends, open_dataarray, open_dataset, open_mfdataset, | ||
save_mfdataset) | ||
from xarray.backends.common import robust_getitem | ||
from xarray.backends.common import (robust_getitem, | ||
PickleByReconstructionWrapper) | ||
from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding | ||
from xarray.backends.pydap_ import PydapDataStore | ||
from xarray.core import indexing | ||
|
@@ -2724,7 +2725,8 @@ def create_tmp_geotiff(nx=4, ny=3, nz=3, | |
# yields a temporary geotiff file and a corresponding expected DataArray | ||
import rasterio | ||
from rasterio.transform import from_origin | ||
with create_tmp_file(suffix='.tif') as tmp_file: | ||
with create_tmp_file(suffix='.tif', | ||
allow_cleanup_failure=ON_WINDOWS) as tmp_file: | ||
# allow 2d or 3d shapes | ||
if nz == 1: | ||
data_shape = ny, nx | ||
|
@@ -2996,6 +2998,14 @@ def test_chunks(self): | |
ex = expected.sel(band=1).mean(dim='x') | ||
assert_allclose(ac, ex) | ||
|
||
def test_pickle_rasterio(self): | ||
# regression test for https://github.com/pydata/xarray/issues/2121 | ||
with create_tmp_geotiff() as (tmp_file, expected): | ||
with xr.open_rasterio(tmp_file) as rioda: | ||
temp = pickle.dumps(rioda) | ||
with pickle.loads(temp) as actual: | ||
assert_equal(actual, rioda) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it make sense to also add an integration test reading rasterio data with dask.distributed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in 55a3abc. |
||
|
||
def test_ENVI_tags(self): | ||
rasterio = pytest.importorskip('rasterio', minversion='1.0a') | ||
from rasterio.transform import from_origin | ||
|
@@ -3260,3 +3270,29 @@ def test_dataarray_to_netcdf_no_name_pathlib(self): | |
|
||
with open_dataarray(tmp) as loaded_da: | ||
assert_identical(original_da, loaded_da) | ||
|
||
|
||
def test_pickle_reconstructor(): | ||
|
||
lines = ['foo bar spam eggs'] | ||
|
||
with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as tmp: | ||
with open(tmp, 'w') as f: | ||
f.writelines(lines) | ||
|
||
obj = PickleByReconstructionWrapper(open, tmp) | ||
|
||
assert obj.value.readlines() == lines | ||
|
||
p_obj = pickle.dumps(obj) | ||
obj.value.close() # for windows | ||
obj2 = pickle.loads(p_obj) | ||
|
||
assert obj2.value.readlines() == lines | ||
|
||
# roundtrip again to make sure we can fully restore the state | ||
p_obj2 = pickle.dumps(obj2) | ||
obj2.value.close() # for windows | ||
obj3 = pickle.loads(p_obj2) | ||
|
||
assert obj3.value.readlines() == lines |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice to add a unit test verifies that this works properly independently of any concrete datastore.
Maybe something simple with
open()
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in 8b286c0