-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support Zlib compression and shuffle filter for local storage
This change adds support for compressed and filtered data for local storage. Data in S3 will be addressed separately. The compression and filters arguments passed to reduce_chunk are actually numcodecs.abc.Codec instances, so we can use them as a black box to decode the compression or filter. Currently we are testing Zlib compression algorithm as well as the HDF5 byte shuffle filter. It's possible that other compression algorithms and filters will "just work" due to using the numcodecs.abc.Codec interface to decode the data, but they have not been tested. Closes: #118
- Loading branch information
1 parent
42e6bce
commit 6cec850
Showing
4 changed files
with
85 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import os | ||
import pytest | ||
|
||
from activestorage.active import Active | ||
from activestorage.config import * | ||
from activestorage.dummy_data import make_compressed_ncdata | ||
|
||
import utils | ||
|
||
|
||
def create_compressed_dataset(tmp_path: str, compression: str, shuffle: bool): | ||
""" | ||
Make a vanilla test dataset which is compressed and optionally shuffled. | ||
""" | ||
temp_file = str(tmp_path / "test_compression.nc") | ||
test_data = make_compressed_ncdata(filename=temp_file, compression=compression, shuffle=shuffle) | ||
|
||
# Sanity check that test data is compressed and filtered as expected. | ||
test_data_filters = test_data[0].variables['data'].filters() | ||
assert test_data_filters[compression] | ||
assert test_data_filters['shuffle'] == shuffle | ||
|
||
test_file = utils.write_to_storage(temp_file) | ||
if USE_S3: | ||
os.remove(temp_file) | ||
return test_file | ||
|
||
|
||
@pytest.mark.skipif(USE_S3, reason="Compression and filtering not supported in S3 yet") | ||
@pytest.mark.parametrize('compression', ['zlib']) | ||
@pytest.mark.parametrize('shuffle', [False, True]) | ||
def test_compression_and_filters(tmp_path: str, compression: str, shuffle: bool): | ||
""" | ||
Test use of datasets with compression and filters applied. | ||
""" | ||
test_file = create_compressed_dataset(tmp_path, compression, shuffle) | ||
|
||
active = Active(test_file, 'data', utils.get_storage_type()) | ||
active._version = 1 | ||
active._method = "min" | ||
result = active[0:2,4:6,7:9] | ||
assert result == 740.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters