Skip to content

Commit

Permalink
Concept changes for Zarr v3 support
Browse files Browse the repository at this point in the history
  • Loading branch information
folterj committed Jul 3, 2024
1 parent a3f0fcf commit d8f1e95
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 65 deletions.
1 change: 1 addition & 0 deletions OmeSliCC/OmeSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class OmeSource:
position: list
"""source position information"""

default_properties_order = 'xyzct'
default_physical_unit = 'µm'

def __init__(self):
Expand Down
3 changes: 2 additions & 1 deletion OmeSliCC/OmeZarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def write(self, sources, tile_size=[], compression=[],

compressor, compression_filters = create_compression_filter(compression)
storage_options = {'dimension_separator': '/', 'chunks': tile_size}
ome_version = '0.4'
# Zarr V3 testing
#storage_options = {'chunks': tile_size}
if compressor is not None:
Expand Down Expand Up @@ -58,7 +59,7 @@ def write(self, sources, tile_size=[], compression=[],
multi_metadata.append(meta)
if multiple_images:
zarr_root.attrs['multiscales'] = multi_metadata
zarr_root.attrs['omero'] = create_channel_metadata(sources[0])
zarr_root.attrs['omero'] = create_channel_metadata(sources[0], ome_version)

def write_dataset(self, zarr_root, data, source,
npyramid_add=0, pyramid_downsample=2, translation=[]):
Expand Down
26 changes: 18 additions & 8 deletions OmeSliCC/OmeZarrSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from OmeSliCC.OmeSource import OmeSource
from OmeSliCC.color_conversion import hexrgb_to_rgba, int_to_rgba
from OmeSliCC.image_util import get_numpy_slicing, redimension_data
from OmeSliCC.util import reorder


class OmeZarrSource(OmeSource):
Expand All @@ -14,6 +15,12 @@ class OmeZarrSource(OmeSource):
"""original filename / URL"""
levels: list
"""list of all image arrays for different sizes"""
level_scales: list
"""list of all image (xy) scales"""
shapes: list
"""list of image shapes"""
chunk_shapes: list
"""list of image chunk shapes"""

def __init__(self, filename: str,
source_pixel_size: list = None,
Expand All @@ -23,6 +30,9 @@ def __init__(self, filename: str,
super().__init__()

self.levels = []
self.level_scales = []
self.shapes = []
self.chunk_shapes = []
nchannels = 1
try:
location = parse_url(filename)
Expand All @@ -41,18 +51,19 @@ def __init__(self, filename: str,
self.dimension_order = ''.join([axis.get('name') for axis in axes])

for data in image_node.data:
#if isinstance(data, np.ndarray):
# data = da.from_array(data)
self.levels.append(data)

xyzct = [1, 1, 1, 1, 1]
for i, n in enumerate(data.shape):
xyzct_index = 'xyzct'.index(self.dimension_order[i])
xyzct_index = self.default_properties_order.index(self.dimension_order[i])
xyzct[xyzct_index] = n
self.sizes_xyzct.append(xyzct)
self.sizes.append((xyzct[0], xyzct[1]))
self.pixel_types.append(data.dtype)
self.pixel_nbits.append(data.dtype.itemsize * 8)
self.level_scales.append(np.divide(self.sizes_xyzct[0][0], xyzct[0]))
self.shapes.append(np.flip(reorder(data.shape, self.dimension_order, self.default_properties_order)))
self.chunk_shapes.append(np.flip(reorder(data.chunksize, self.dimension_order, self.default_properties_order)))
nchannels = xyzct[3]
except Exception as e:
raise FileNotFoundError(f'Read error: {e}')
Expand All @@ -72,14 +83,13 @@ def _find_metadata(self):

units = [axis.get('unit', '') for axis in metadata.get('axes', [])]

scale1 = [0, 0, 0, 0, 0]
scale1 = [1, 1, 1, 1, 1]
# get pixelsize using largest/first scale
transform = self.metadata.get('coordinateTransformations', [])
if transform:
transform = transform[0]
for transform_element in transform:
if 'scale' in transform_element:
scale1 = transform_element['scale']
for transform1 in transform[0]:
if transform1['type'] == 'scale':
scale1 = transform1['scale']
for axis in 'xyz':
if axis in axes:
index = axes.index(axis)
Expand Down
93 changes: 56 additions & 37 deletions OmeSliCC/Zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,95 +3,114 @@
import pathlib
import shutil
import zarr
from ome_zarr.scale import Scaler

from OmeSliCC.image_util import *
from OmeSliCC.ome_zarr_util import *
from OmeSliCC.util import *


class Zarr:
def __init__(self, filename):
def __init__(self, filename, ome=None, v3=False):
self.filename = filename
self.ome = ('ome' == self.filename.split('.')[1].lower())
if ome is not None:
self.ome = ome
else:
self.ome = ('ome' == self.filename.split('.')[1].lower())
self.v3 = v3
self.data = []

def create(self, source, tile_size=None, npyramid_add=0, pyramid_downsample=2, compression=[],
v3=False):
def create(self, source, shapes=[], chunk_shapes=[], level_scales=[],
tile_size=None, npyramid_add=0, pyramid_downsample=2, compression=[]):
# create empty dataset
dimension_order = source.get_dimension_order()
self.dimension_order = dimension_order
nlevels = max(1 + npyramid_add, len(shapes))
self.npyramid_add = npyramid_add
self.pyramid_downsample = pyramid_downsample
if v3:
self.level_scales = level_scales
if self.v3:
import zarrita
store_path = zarrita.store.make_store_path(self.filename)
if os.path.exists(self.filename):
shutil.rmtree(str(store_path.store.root))
shutil.rmtree(self.filename)
self.zarr_root = zarrita.Group.create(store=store_path, exists_ok=True)
ome_version = '0.5-dev1'
else:
file_url = pathlib.Path(self.filename, mode='w').as_uri()
self.zarr_root = zarr.open_group(store=file_url, mode='w', storage_options={'dimension_separator': '/'})
ome_version = '0.4'
xyzct = source.get_size_xyzct()
self.scaler = Scaler(downscale=pyramid_downsample, max_layer=npyramid_add)
shape0 = [xyzct['xyzct'.index(dimension)] for dimension in dimension_order]
dtype = source.pixel_types[0]
pixel_size_um = source.get_pixel_size_micrometer()
scale = 1
datasets = []
if tile_size:
if tile_size and len(tile_size) < 5:
if isinstance(tile_size, int):
tile_size = [tile_size] * 2
elif len(tile_size) == 1:
tile_size = tile_size * 2
tile_size = [1, 1, 1] + list(np.flip(tile_size))
for pathi in range(1 + npyramid_add):
shape = calc_shape_scale(shape0, dimension_order, scale)
if v3:
tile_size = list(np.flip(tile_size))
while len(tile_size) < 5:
tile_size = [1] + tile_size
for level in range(nlevels):
if len(shapes) > 0:
shape = shapes[level]
else:
shape = scale_dimensions_xy(shape0, dimension_order, scale)
if len(chunk_shapes) > 0:
chunk_shape = chunk_shapes[level]
else:
chunk_shape = np.min([tile_size, shape], axis=0)
if self.v3:
import zarrita
shape = np.array(shape).tolist() # convert to basic int
tile_size = np.array(tile_size).tolist() # convert to basic int
shape = np.array(shape).tolist() # convert to basic int
chunk_shape = np.array(chunk_shape).tolist() # convert to basic int
codecs = create_compression_codecs(compression)
dataset = self.zarr_root.create_array(str(pathi), shape=shape, chunk_shape=tile_size, dtype=dtype,
dataset = self.zarr_root.create_array(str(level), shape=shape, chunk_shape=chunk_shape, dtype=dtype,
codecs=codecs)
else:
chunk_shape = tile_size
compressor, compression_filters = create_compression_filter(compression)
dataset = self.zarr_root.create_dataset(str(pathi), shape=shape, chunks=tile_size, dtype=dtype,
dataset = self.zarr_root.create_dataset(str(level), shape=shape, chunks=chunk_shape, dtype=dtype,
compressor=compressor, filters=compression_filters)
self.data.append(dataset)
# used for ome metadata:
datasets.append({
'path': str(pathi),
'path': str(level),
'coordinateTransformations': create_transformation_metadata(dimension_order, pixel_size_um, scale)
})
scale /= pyramid_downsample

if self.ome:
metadata = {
'version': '0.4',
multiscales = {
'version': ome_version,
'axes': create_axes_metadata(dimension_order),
'name': get_filetitle(source.source_reference),
'datasets': datasets,
}
metadata = {'multiscales': [multiscales], 'omero': create_channel_metadata(source, ome_version)}
if self.v3:
self.zarr_root.update_attributes(metadata)
else:
self.zarr_root.attrs = metadata

self.zarr_root.attrs['multiscales'] = [metadata]
self.zarr_root.attrs['omero'] = create_channel_metadata(source)

def get(self, level, x0=0, y0=0, x1=-1, y1=-1):
data = self.data[level][..., y0:y1, x0:x1]
def get(self, level, **slicing):
slices = get_numpy_slicing(self.dimension_order, **slicing)
data = self.data[level][slices]
return data

def set(self, data, x0=0, y0=0, x1=0, y1=0):
if y1 <= 0:
y1 = data.shape[-2]
if x1 <= 0:
x1 = data.shape[-1]
def set(self, data, **slicing):
scale = 1
for pathi in range(1 + self.npyramid_add):
sx0, sy0, sx1, sy1 = np.round(np.multiply([x0, y0, x1, y1], scale)).astype(int)
if scale != 1:
new_size = sx1 - sx0, sy1 - sy0
data1 = image_resize(data, new_size, dimension_order=self.dimension_order)
else:
data1 = data
#self.data[pathi][..., sy0:sy1, sx0:sx1] = data1
self.data[pathi] = data1
for level, sized_data in enumerate(self.scaler.nearest(data)):
resized_slicing = scale_dimensions_dict(slicing, scale)
slices = get_numpy_slicing(self.dimension_order, **resized_slicing)
self.data[level][slices] = np.asarray(sized_data)
scale /= self.pyramid_downsample

def set_level(self, level, data, **slicing):
resized_slicing = scale_dimensions_dict(slicing, 1 / self.level_scales[level])
slices = get_numpy_slicing(self.dimension_order, **resized_slicing)
self.data[level][slices] = np.asarray(data)
6 changes: 3 additions & 3 deletions OmeSliCC/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,16 +229,16 @@ def save_image_as_ome_zarr(source: OmeSource, data: np.ndarray, output_filename:


def save_image_as_zarr(source: OmeSource, data: np.ndarray, output_filename: str, output_params: dict,
v3: bool = False):
ome: bool = None, v3: bool = False):
# ome-zarr: https://ngff.openmicroscopy.org/latest/
tile_size = output_params.get('tile_size')
compression = output_params.get('compression')
npyramid_add = output_params.get('npyramid_add', 0)
pyramid_downsample = output_params.get('pyramid_downsample')

zarr = Zarr(output_filename)
zarr = Zarr(output_filename, ome=ome, v3=v3)
zarr.create(source, tile_size=tile_size, npyramid_add=npyramid_add, pyramid_downsample=pyramid_downsample,
compression=compression, v3=v3)
compression=compression)
zarr.set(data)


Expand Down
21 changes: 16 additions & 5 deletions OmeSliCC/ome_zarr_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def create_transformation_metadata(dimension_order, pixel_size_um, scale, transl
return metadata


def create_channel_metadata(source):
def create_channel_metadata(source, ome_version):
channels = source.get_channels()
nchannels = source.get_nchannels()

Expand All @@ -72,18 +72,29 @@ def create_channel_metadata(source):
omezarr_channels.append(channel)

metadata = {
'version': '0.4',
'version': ome_version,
'channels': omezarr_channels,
}
return metadata


def calc_shape_scale(shape0, dimension_order, scale):
def scale_dimensions_xy(shape0, dimension_order, scale):
shape = []
if scale == 1:
return shape0
for shape1, dimension in zip(shape0, dimension_order):
if dimension in ['x', 'y']:
shape1 = int(round(shape1 * scale))
if dimension[0] in ['x', 'y']:
shape1 = int(shape1 * scale)
shape.append(shape1)
return shape


def scale_dimensions_dict(shape0, scale):
shape = {}
if scale == 1:
return shape0
for dimension, shape1 in shape0.items():
if dimension[0] in ['x', 'y']:
shape1 = int(shape1 * scale)
shape[dimension] = shape1
return shape
11 changes: 11 additions & 0 deletions OmeSliCC/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ def ensure_list(x) -> list:
return [x]


def reorder(items: list, old_order: str, new_order: str, default_value: int = 0) -> list:
new_items = []
for label in new_order:
if label in old_order:
item = items[old_order.index(label)]
else:
item = default_value
new_items.append(item)
return new_items


def filter_dict(dict0: dict) -> dict:
new_dict = {}
for key, value0 in dict0.items():
Expand Down
26 changes: 15 additions & 11 deletions tests/ome_zarr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
import numpy as np
from tqdm import tqdm

from OmeSliCC.conversion import create_source, save_image_as_zarr
from OmeSliCC.image_util import *
from OmeSliCC.Zarr import Zarr
from OmeSliCC.OmeZarrSource import OmeZarrSource
from OmeSliCC.Zarr import Zarr


def create_zarr(source, output_filename, tile_size, npyramid_add, pyramid_downsample):
Expand Down Expand Up @@ -33,30 +32,35 @@ def open_omezarr_source(filename):
show_image(image)


def save_zarr_v3(source, data, filename, output_params):
save_image_as_zarr(source, data, filename, output_params, v3=True)
def convert_ome_zarr_v2_to_v3(filename):
source = OmeZarrSource(filename)
zarr = Zarr(output_filename, ome=True, v3=True)
print(source.shapes, source.chunk_shapes, source.level_scales)
zarr.create(source, shapes=source.shapes, chunk_shapes=source.chunk_shapes, level_scales=source.level_scales,
compression=compression)
for level, data in enumerate(source.get_source_dask()):
zarr.set_level(level, data)


if __name__ == '__main__':
#filename = 'E:/Personal/Crick/slides/TCGA_KIRC/0f450938-5604-4af6-8783-c385ea647569/TCGA-A3-3358-01Z-00-DX1.1bd1c720-f6db-4837-8f83-e7476dd2b0a3.svs'
#filename = 'E:/Personal/Crick/slides/test_images/zarr test.zarr'
filename = 'https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr'
source = create_source(filename, {})
w, h = source.get_size()
filename = 'D:/slides/6001240.zarr'
#filename = 'https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0062A/6001240.zarr'
output_filename = 'D:/slides/test/' + os.path.basename(filename)
npyramid_add = 4
pyramid_downsample = 2
tile_size = 16
compression = None
output_params = {
'tile_size': [tile_size, tile_size],
'tile_size': tile_size,
'npyramid_add': npyramid_add,
'pyramid_downsample': pyramid_downsample,
'compression': compression
}

#create_zarr(source, output_filename, tile_size, npyramid_add, pyramid_downsample)
#open_omezarr_source(output_filename)

data = source.get_source_dask()[0]
save_zarr_v3(source, data, output_filename, output_params)
convert_ome_zarr_v2_to_v3(filename)

print('done')

0 comments on commit d8f1e95

Please sign in to comment.