Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop fixed length strings from chunking #344

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions python/src/odin_data/meta_writer/hdf5dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def __init__(
cache=True,
block_size=1000000,
block_timeout=600,
**kwargs
**kwargs,
):
super(Int32HDF5Dataset, self).__init__(
name,
Expand All @@ -310,7 +310,7 @@ def __init__(
cache=cache,
block_size=block_size,
block_timeout=block_timeout,
**kwargs
**kwargs,
)


Expand All @@ -326,7 +326,7 @@ def __init__(
cache=True,
block_size=1000000,
block_timeout=600,
**kwargs
**kwargs,
):
super(Int64HDF5Dataset, self).__init__(
name,
Expand All @@ -337,7 +337,7 @@ def __init__(
cache=cache,
block_size=block_size,
block_timeout=block_timeout,
**kwargs
**kwargs,
)


Expand All @@ -352,7 +352,7 @@ def __init__(
cache=True,
block_size=1000000,
block_timeout=600,
**kwargs
**kwargs,
):
super(Float32HDF5Dataset, self).__init__(
name,
Expand All @@ -363,7 +363,7 @@ def __init__(
cache=cache,
block_size=block_size,
block_timeout=block_timeout,
**kwargs
**kwargs,
)


Expand All @@ -378,7 +378,7 @@ def __init__(
cache=True,
block_size=1000000,
block_timeout=600,
**kwargs
**kwargs,
):
super(Float64HDF5Dataset, self).__init__(
name,
Expand All @@ -389,7 +389,7 @@ def __init__(
cache=cache,
block_size=block_size,
block_timeout=block_timeout,
**kwargs
**kwargs,
)


Expand All @@ -406,7 +406,7 @@ def __init__(
cache=True,
block_size=1000000,
block_timeout=600,
**kwargs
**kwargs,
):
"""
Args:
Expand All @@ -424,8 +424,10 @@ def __init__(
cache=cache,
block_size=block_size,
block_timeout=block_timeout,
**kwargs
**kwargs,
)
if length and not maxshape:
self.maxshape = None

def prepare_data(self, data):
"""Prepare data ready to write to hdf5 dataset
Expand Down
9 changes: 6 additions & 3 deletions python/src/odin_data/meta_writer/meta_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

Matt Taylor, Diamond Light Source
"""

import os
from time import time
import logging
Expand All @@ -14,6 +15,7 @@
from odin_data import __version__
from odin_data.meta_writer.hdf5dataset import HDF5Dataset, Int64HDF5Dataset
from odin_data.util import construct_version_dict
from typing import Iterable

# Data message parameters
FRAME = "frame"
Expand All @@ -34,7 +36,6 @@


class MetaWriterConfig(object):

def __init__(self, sensor_shape):
"""
Args:
Expand Down Expand Up @@ -208,7 +209,7 @@ def _create_datasets(self, dataset_size):
chunks = dataset.maxshape
if isinstance(chunks, int):
chunks = (chunks,)
if None in chunks:
if isinstance(chunks, Iterable) and None in chunks:
chunks = None
self._logger.debug("Dataset {} chunking: {}".format(dataset.name, chunks))

Expand Down Expand Up @@ -571,7 +572,9 @@ def handle_stop_acquisition(self, header, _data):
return

self._logger.debug(
"%s | Received stopacquisition from endpoint %s", self._name, header[ENDPOINT]
"%s | Received stopacquisition from endpoint %s",
self._name,
header[ENDPOINT],
)
self._processes_running[self._endpoints.index(header[ENDPOINT])] = False

Expand Down
63 changes: 63 additions & 0 deletions python/tests/test_metawriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from odin_data.meta_writer.meta_writer import MetaWriter
from odin_data.meta_writer.hdf5dataset import StringHDF5Dataset
import tempfile
from unittest.mock import MagicMock
import h5py
import pytest
from numpy import dtype


@pytest.fixture()
def meta_writer_with_temp_file():
temp_file = tempfile.TemporaryFile()
meta_writer = MetaWriter("", "", MagicMock(), MagicMock)
meta_writer._hdf5_file = h5py.File(temp_file, "r+")
return meta_writer


def write_meta_with_dataset(meta_writer: MetaWriter, dataset):
meta_writer._datasets = {"": dataset}
meta_writer._create_datasets(1)
return meta_writer._hdf5_file


def test_when_string_written_then_expected_type(meta_writer_with_temp_file):
data_set = StringHDF5Dataset("test", length=100)
meta_file = write_meta_with_dataset(meta_writer_with_temp_file, data_set)
assert meta_file["test"] is not None
assert meta_file["test"].dtype == dtype("S100")


def test_given_data_set_with_no_chunks_but_max_shape_then_use_chunks(
meta_writer_with_temp_file,
):
data_set = StringHDF5Dataset("test", maxshape=(100,))
meta_file = write_meta_with_dataset(meta_writer_with_temp_file, data_set)
assert meta_file["test"].chunks is not None


def test_given_data_set_with_fixed_length_and_no_max_shape_then_has_no_max_shape_or_chunking(
meta_writer_with_temp_file,
):
data_set = StringHDF5Dataset("test", maxshape=None, length=100)
meta_file = write_meta_with_dataset(meta_writer_with_temp_file, data_set)
assert meta_file["test"].maxshape == (0,)
assert meta_file["test"].chunks is None


def test_given_data_set_with_fixed_length_and_specified_max_shape_then_has_max_shape_and_chunking(
meta_writer_with_temp_file,
):
data_set = StringHDF5Dataset("test", maxshape=(200,), length=100)
meta_file = write_meta_with_dataset(meta_writer_with_temp_file, data_set)
assert meta_file["test"].maxshape == (200,)
assert meta_file["test"].chunks is not None


def test_given_data_set_with_fixed_length_then_can_add_value(
meta_writer_with_temp_file,
):
data_set = StringHDF5Dataset("test", maxshape=None, length=100)
write_meta_with_dataset(meta_writer_with_temp_file, data_set)
data_set.add_value(10)
data_set.flush()