From d31ee700d34240b472b00af941cbaba39c68c2a0 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 18 Mar 2024 13:16:42 +0000 Subject: [PATCH 01/24] wip: start implementing downsampling method --- .../zarr/large_image_source_zarr/__init__.py | 51 ++++++++++++++++++- .../zarr/large_image_source_zarr/resample.py | 46 +++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 sources/zarr/large_image_source_zarr/resample.py diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 8782f6111..59af071b6 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -1,3 +1,4 @@ +import itertools import math import os import shutil @@ -11,6 +12,7 @@ import numpy as np import packaging.version import zarr +from PIL import Image import large_image from large_image.cache_util import LruCacheMetaclass, methodcache @@ -19,6 +21,8 @@ from large_image.tilesource import FileTileSource from large_image.tilesource.utilities import _imageToNumpy, nearPowerOfTwo +from .resample import ResampleMethod + try: __version__ = _importlib_version(__name__) except PackageNotFoundError: @@ -652,12 +656,57 @@ def crop(self, value): raise TileSourceError(msg) self._crop = (x, y, w, h) + def _generateDownsampledLevels(self, resample_method): + self._checkEditable() + current_arrays = dict(self._zarr.arrays()) + if 'root' not in current_arrays: + msg = 'No root data found, cannot generate lower resolution levels.' + raise TileSourceError(msg) + if 'x' not in self._axes or 'y' not in self._axes: + msg = 'Data must have an X axis and Y axis to generate lower resolution levels.' + raise TileSourceError(msg) + + x = self._axes.get('x') + y = self._axes.get('y') + + current_data = current_arrays['root'] + current_shape = list(current_data.shape) + for level in range(self.levels): + current_shape[x] = int(current_shape[x] / 2) + current_shape[y] = int(current_shape[y] / 2) + resized_data = np.empty(current_shape) + + if resample_method in [ + ResampleMethod.PIL_BICUBIC, + ResampleMethod.PIL_BILINEAR, + ResampleMethod.PIL_BOX, + ResampleMethod.PIL_HAMMING, + ResampleMethod.PIL_LANCZOS, + ResampleMethod.PIL_NEAREST, + ]: + frame_dims = current_shape[:-3] + frame_shape = current_shape[-3:-1] + # TODO: for multiband (>4) images, split by band axis as well + # TODO: for single band images, use bilinear? explore this! + for frame_ind in itertools.product(*[range(f) for f in frame_dims]): + img = Image.fromarray(current_data[(*frame_ind,)]) + resized_img = img.resize(frame_shape, resample=resample_method.value) + resized_data[(*frame_ind,)] = np.transpose(np.array(resized_img), (1, 0, 2)) + else: + # TODO: Implement non-PIL resample methods + print(resample_method) + + print(current_shape, resized_data.shape) + # TODO: rename root dataset to whatever schema works, try variations of level index + self._zarr.create_dataset(level + 1, data=resized_data) + def write( self, path, lossy=True, alpha=True, overwriteAllowed=True, + resample=ResampleMethod.PIL_NEAREST, ): """ Output the current image to a file. @@ -677,7 +726,7 @@ def write( else: raise TileSourceError('Output path exists (%s).' % str(path)) - # TODO: compute half, quarter, etc. resolutions + self._generateDownsampledLevels(resample) self._validateZarr() suffix = Path(path).suffix data_dir = self._tempdir diff --git a/sources/zarr/large_image_source_zarr/resample.py b/sources/zarr/large_image_source_zarr/resample.py new file mode 100644 index 000000000..a931538b7 --- /dev/null +++ b/sources/zarr/large_image_source_zarr/resample.py @@ -0,0 +1,46 @@ +from enum import Enum + +from PIL.Image import Resampling + +# TODO: move this module to large_image/tilesource if the +# implementation does not include any zarr-specific functions +# and type it! if it is moved + + +def resample_mean(data): + print('resample mean', data) + + +def resample_median(data): + print('resample median', data) + + +def resample_mode(data): + print('resample mode', data) + + +def resample_max(data): + print('resample max', data) + + +def resample_min(data): + print('resample min', data) + + +def resample_nearest(data): + print('resample nearest', data) + + +class ResampleMethod(Enum): + MEAN = resample_mean + MEDIAN = resample_median + MODE = resample_mode + MAX = resample_max + MIN = resample_min + NEAREST = resample_nearest + PIL_BICUBIC = Resampling.BICUBIC + PIL_BILINEAR = Resampling.BILINEAR + PIL_BOX = Resampling.BOX + PIL_HAMMING = Resampling.HAMMING + PIL_LANCZOS = Resampling.LANCZOS + PIL_NEAREST = Resampling.NEAREST From 91d0b9934f9d617af06be11bdf73e412fafe53b3 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 26 Mar 2024 19:56:21 +0000 Subject: [PATCH 02/24] fix: handle bands separately when number of bands > 4 --- .../zarr/large_image_source_zarr/__init__.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 59af071b6..49af8ca72 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -671,10 +671,10 @@ def _generateDownsampledLevels(self, resample_method): current_data = current_arrays['root'] current_shape = list(current_data.shape) - for level in range(self.levels): + for level in range(self.levels - 1): current_shape[x] = int(current_shape[x] / 2) current_shape[y] = int(current_shape[y] / 2) - resized_data = np.empty(current_shape) + resized_data = np.empty(current_shape, dtype=self.dtype) if resample_method in [ ResampleMethod.PIL_BICUBIC, @@ -686,18 +686,28 @@ def _generateDownsampledLevels(self, resample_method): ]: frame_dims = current_shape[:-3] frame_shape = current_shape[-3:-1] - # TODO: for multiband (>4) images, split by band axis as well + num_bands = current_shape[-1] + slice_indices = [] # TODO: for single band images, use bilinear? explore this! - for frame_ind in itertools.product(*[range(f) for f in frame_dims]): - img = Image.fromarray(current_data[(*frame_ind,)]) - resized_img = img.resize(frame_shape, resample=resample_method.value) - resized_data[(*frame_ind,)] = np.transpose(np.array(resized_img), (1, 0, 2)) + for frame_index in itertools.product(*[range(f) for f in frame_dims]): + if num_bands > 4: + for band_index in range(num_bands): + img = Image.fromarray(current_data[(*frame_index, ..., band_index)], mode='L') + resized_img = img.resize(frame_shape, resample=resample_method.value) + result = np.array(resized_img).astype(self.dtype) + # result = np.transpose(result) + resized_data[(*frame_index, ..., band_index)] = result + else: + img = Image.fromarray(current_data[(*frame_index,)]) + resized_img = img.resize(frame_shape, resample=resample_method.value) + result = np.array(resized_img).astype(self.dtype) + # result = np.transpose(result, (1, 0, 2)) + resized_data[(*frame_index,)] = result + else: # TODO: Implement non-PIL resample methods - print(resample_method) + pass - print(current_shape, resized_data.shape) - # TODO: rename root dataset to whatever schema works, try variations of level index self._zarr.create_dataset(level + 1, data=resized_data) def write( From 152b80bec5f06be6105a99bd62c800acf3d8e606 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 26 Mar 2024 19:57:11 +0000 Subject: [PATCH 03/24] test: add basic tests for downsampling --- test/test_sink.py | 69 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index f6bccf26c..fc4412cf5 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -3,6 +3,7 @@ import large_image_source_zarr import numpy as np import pytest +from pathlib import Path # remove this import later import large_image @@ -17,6 +18,7 @@ # 'svi', # 'svs', ] +RESAMPLE_METHODS = list(large_image.resample.ResampleMethod) def copyFromSource(source, sink): @@ -122,7 +124,6 @@ def testImageCopySmall(file_type, tmp_path): assert metadata.get('bandCount') == 3 assert len(metadata.get('frames')) == 6 - # TODO: fix these failures; unexpected metadata when reading it back sink.write(output_file) if file_type == 'zarr': output_file /= '.zattrs' @@ -164,7 +165,6 @@ def testImageCopySmallMultiband(file_type, tmp_path): assert metadata.get('bandCount') == 7 assert len(metadata.get('frames')) == 6 - # TODO: fix these failures; unexpected metadata when reading it back sink.write(output_file) if file_type == 'zarr': output_file /= '.zattrs' @@ -177,3 +177,68 @@ def testImageCopySmallMultiband(file_type, tmp_path): assert new_metadata.get('levels') == 2 or new_metadata.get('levels') == 3 assert new_metadata.get('bandCount') == 7 assert len(new_metadata.get('frames')) == 6 + + +@pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) +def testImageCopyLargeDownsampling(resample_method, tmp_path): + tmp_path = Path('tmp') # keep results; remove this later + output_file = tmp_path / f'{resample_method}.db' + sink = large_image_source_zarr.new() + source = large_image_source_test.TestTileSource( + fractal=True, + tileWidth=128, + tileHeight=128, + sizeX=2048, + sizeY=4096, + frames="c=2,z=3", + ) + copyFromSource(source, sink) + sink.write(output_file, resample=resample_method) + written = large_image_source_zarr.open(output_file) + written_arrays = dict(written._zarr.arrays()) + + assert len(written_arrays) == written.levels + assert written_arrays.get('root') is not None + assert written_arrays.get('root').shape == (2, 3, 4096, 2048, 3) + assert written_arrays.get('1') is not None + assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 3) + assert written_arrays.get('2') is not None + assert written_arrays.get('2').shape == (2, 3, 1024, 512, 3) + assert written_arrays.get('3') is not None + assert written_arrays.get('3').shape == (2, 3, 512, 256, 3) + + # TODO: Can the content of the downsampled data be compared back to the test source? + + +@pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) +def testImageCopyLargeDownsamplingMultiband(resample_method, tmp_path): + tmp_path = Path('tmp') # keep results; remove this later + output_file = tmp_path / f'{resample_method}_multiband.db' + sink = large_image_source_zarr.new() + bands = ( + 'red=400-12000,green=0-65535,blue=800-4000,' + 'ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' + ) + source = large_image_source_test.TestTileSource( + fractal=True, + tileWidth=128, + tileHeight=128, + sizeX=2048, + sizeY=4096, + frames="c=2,z=3", + bands=bands, + ) + copyFromSource(source, sink) + sink.write(output_file, resample=resample_method) + written = large_image_source_zarr.open(output_file) + written_arrays = dict(written._zarr.arrays()) + + assert len(written_arrays) == written.levels + assert written_arrays.get('root') is not None + assert written_arrays.get('root').shape == (2, 3, 4096, 2048, 7) + assert written_arrays.get('1') is not None + assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 7) + assert written_arrays.get('2') is not None + assert written_arrays.get('2').shape == (2, 3, 1024, 512, 7) + assert written_arrays.get('3') is not None + assert written_arrays.get('3').shape == (2, 3, 512, 256, 7) From 1ea179e5c0232752ecb07f1c5cdfb782676d7e18 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 26 Mar 2024 20:09:03 +0000 Subject: [PATCH 04/24] fix: update reference to ResampleMethod --- test/test_sink.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_sink.py b/test/test_sink.py index fc4412cf5..a4c434ab1 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -7,6 +7,7 @@ import large_image + TMP_DIR = 'tmp/zarr_sink' FILE_TYPES = [ 'tiff', @@ -18,7 +19,7 @@ # 'svi', # 'svs', ] -RESAMPLE_METHODS = list(large_image.resample.ResampleMethod) +RESAMPLE_METHODS = list(large_image_source_zarr.resample.ResampleMethod) def copyFromSource(source, sink): From 7f3570242e2314dfbc125c5912cd58728442ea72 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 28 Mar 2024 21:17:53 +0000 Subject: [PATCH 05/24] fix: Use tileIterator and addTile to write downsampled levels --- .../zarr/large_image_source_zarr/__init__.py | 215 +++++++++++------- 1 file changed, 131 insertions(+), 84 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 49af8ca72..2e8fe3dab 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -1,4 +1,3 @@ -import itertools import math import os import shutil @@ -534,6 +533,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): # TODO: improve band bookkeeping self._checkEditable() + store_path = str(kwargs.pop('level', 0)) placement = { 'x': x, 'y': y, @@ -565,29 +565,35 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): new_dims = { a: max( - self._dims.get(a, 0), + self._dims.get(store_path, {}).get(a, 0), placement.get(a, 0) + tile.shape[i], ) for a, i in self._axes.items() } + self._dims[store_path] = new_dims placement_slices = tuple([ slice(placement.get(a, 0), placement.get(a, 0) + tile.shape[i], 1) for i, a in enumerate(axes) ]) current_arrays = dict(self._zarr.arrays()) + if store_path == '0': + # if writing to base data, invalidate generated levels + for path in current_arrays: + if path != store_path: + self._zarr_store.rmdir(path) chunking = None - if 'root' not in current_arrays: - root = np.empty(tuple(new_dims.values()), dtype=tile.dtype) + if store_path not in current_arrays: + arr = np.empty(tuple(new_dims.values()), dtype=tile.dtype) chunking = tuple([ self._tileSize if a in ['x', 'y'] else new_dims.get('s') if a == 's' else 1 for a in axes ]) else: - root = current_arrays['root'] - root.resize(*tuple(new_dims.values())) - if root.chunks[-1] != new_dims.get('s'): + arr = current_arrays[store_path] + arr.resize(*tuple(new_dims.values())) + if arr.chunks[-1] != new_dims.get('s'): # rechunk if length of samples axis changes chunking = tuple([ self._tileSize if a in ['x', 'y'] else @@ -596,40 +602,45 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): ]) if mask is not None: - root[placement_slices] = np.where(mask, tile, root[placement_slices]) + arr[placement_slices] = np.where(mask, tile, arr[placement_slices]) else: - root[placement_slices] = tile + arr[placement_slices] = tile if chunking: - self._zarr.create_dataset('root', data=root[:], chunks=chunking, overwrite=True) - - # Edit OME metadata - self._zarr.attrs.update({ - 'multiscales': [{ - 'version': '0.5-dev', - 'axes': [{ - 'name': a, - 'type': 'space' if a in ['x', 'y'] else 'other', - } for a in axes], - 'datasets': [{'path': 0}], - }], - 'omero': {'version': '0.5-dev'}, - }) - - # Edit large_image attributes - self._dims = new_dims - self._dtype = tile.dtype - self._bandCount = new_dims.get(axes[-1]) # last axis is assumed to be bands - self.sizeX = new_dims.get('x') - self.sizeY = new_dims.get('y') - self._framecount = np.prod([ - length - for axis, length in new_dims.items() - if axis in axes[:-3] - ]) - self._cacheValue = str(uuid.uuid4()) - self._levels = None - self.levels = int(max(1, math.ceil(math.log(max( - self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) + zarr.array( + arr, + chunks=chunking, + overwrite=True, + store=self._zarr_store, + path=store_path, + ) + + # If base data changed, update large_image attributes and OME metadata + if store_path == '0': + self._zarr.attrs.update({ + 'multiscales': [{ + 'version': '0.5-dev', + 'axes': [{ + 'name': a, + 'type': 'space' if a in ['x', 'y'] else 'other', + } for a in axes], + 'datasets': [{'path': 0}], + }], + 'omero': {'version': '0.5-dev'}, + }) + + self._dtype = tile.dtype + self._bandCount = new_dims.get(axes[-1]) # last axis is assumed to be bands + self.sizeX = new_dims.get('x') + self.sizeY = new_dims.get('y') + self._framecount = np.prod([ + length + for axis, length in new_dims.items() + if axis in axes[:-3] + ]) + self._cacheValue = str(uuid.uuid4()) + self._levels = None + self.levels = int(max(1, math.ceil(math.log(max( + self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) @property def crop(self): @@ -656,59 +667,96 @@ def crop(self, value): raise TileSourceError(msg) self._crop = (x, y, w, h) + def _downsampleTile(self, tile, resample_method): + new_shape = { + 'height': int(tile.shape[0] / 2), + 'width': int(tile.shape[1] / 2), + 'bands': 1, + } + if len(tile.shape) > 2: + new_shape['bands'] = tile.shape[-1] + if new_shape['bands'] > 4: + result = np.empty( + (new_shape['height'], new_shape['width'], new_shape['bands']), + dtype=self.dtype, + ) + for band_index in range(new_shape['bands']): + selection = tile[(..., band_index)] + print(selection.shape, band_index) + # use mode I;16 to support 16-bit integers + img = Image.fromarray(selection, mode='I;16') + # TODO: Only NEAREST works for 16 bit images + resized_img = img.resize( + (new_shape['width'], new_shape['height']), + resample=resample_method.value, + ) + result[(..., band_index)] = np.array(resized_img).astype(self.dtype) + return result + else: + img = Image.fromarray(tile) + resized_img = img.resize( + (new_shape['width'], new_shape['height']), + resample=resample_method.value, + ) + result = np.array(resized_img).astype(self.dtype) + return result + def _generateDownsampledLevels(self, resample_method): self._checkEditable() current_arrays = dict(self._zarr.arrays()) - if 'root' not in current_arrays: + if '0' not in current_arrays: msg = 'No root data found, cannot generate lower resolution levels.' raise TileSourceError(msg) if 'x' not in self._axes or 'y' not in self._axes: msg = 'Data must have an X axis and Y axis to generate lower resolution levels.' raise TileSourceError(msg) - x = self._axes.get('x') - y = self._axes.get('y') - - current_data = current_arrays['root'] - current_shape = list(current_data.shape) - for level in range(self.levels - 1): - current_shape[x] = int(current_shape[x] / 2) - current_shape[y] = int(current_shape[y] / 2) - resized_data = np.empty(current_shape, dtype=self.dtype) - - if resample_method in [ - ResampleMethod.PIL_BICUBIC, - ResampleMethod.PIL_BILINEAR, - ResampleMethod.PIL_BOX, - ResampleMethod.PIL_HAMMING, - ResampleMethod.PIL_LANCZOS, - ResampleMethod.PIL_NEAREST, - ]: - frame_dims = current_shape[:-3] - frame_shape = current_shape[-3:-1] - num_bands = current_shape[-1] - slice_indices = [] - # TODO: for single band images, use bilinear? explore this! - for frame_index in itertools.product(*[range(f) for f in frame_dims]): - if num_bands > 4: - for band_index in range(num_bands): - img = Image.fromarray(current_data[(*frame_index, ..., band_index)], mode='L') - resized_img = img.resize(frame_shape, resample=resample_method.value) - result = np.array(resized_img).astype(self.dtype) - # result = np.transpose(result) - resized_data[(*frame_index, ..., band_index)] = result - else: - img = Image.fromarray(current_data[(*frame_index,)]) - resized_img = img.resize(frame_shape, resample=resample_method.value) - result = np.array(resized_img).astype(self.dtype) - # result = np.transpose(result, (1, 0, 2)) - resized_data[(*frame_index,)] = result - - else: - # TODO: Implement non-PIL resample methods - pass - - self._zarr.create_dataset(level + 1, data=resized_data) + metadata = self.getMetadata() + tile_size = dict(width=4096, height=4096) + for level in range(1, self.levels): + scale_factor = 2 ** level + iterator_output = dict( + maxWidth=self.sizeX // scale_factor, + maxHeight=self.sizeY // scale_factor, + ) + for frame in metadata.get('frames', [{'Index': 0}]): + frame_position = { + k.replace('Index', '').lower(): v + for k, v in frame.items() + if k.replace('Index', '').lower() in self._axes + } + for tile in self.tileIterator( + tile_size=tile_size, + tile_overlap=dict(x=4, y=4), + frame=frame['Index'], + output=iterator_output, + resample=False, # TODO: incorporate resampling in core + ): + new_tile = self._downsampleTile(tile['tile'], resample_method) + overlap = {k: int(v / 2) for k, v in tile['tile_overlap'].items()} + new_tile = new_tile[ + slice(overlap['top'], new_tile.shape[0] - overlap['bottom']), + slice(overlap['left'], new_tile.shape[1] - overlap['right']), + ] + + x = int( + tile['tile_position']['level_x'] * + (tile_size['width'] / 2), + ) + y = int( + tile['tile_position']['level_y'] * + (tile_size['height'] / 2), + ) + + self.addTile( + new_tile, + x=x, + y=y, + **frame_position, + axes=list(self._axes.keys()), + level=level, + ) + self._validateZarr() # refresh self._levels before continuing def write( self, @@ -737,7 +785,6 @@ def write( raise TileSourceError('Output path exists (%s).' % str(path)) self._generateDownsampledLevels(resample) - self._validateZarr() suffix = Path(path).suffix data_dir = self._tempdir data_store = self._zarr_store From e86015d5c7760e83dac37a662ad97a71d2ea99ed Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 28 Mar 2024 21:21:16 +0000 Subject: [PATCH 06/24] test: Update downsampling tests --- test/test_sink.py | 57 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index a4c434ab1..1bed4e8cb 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -3,11 +3,9 @@ import large_image_source_zarr import numpy as np import pytest -from pathlib import Path # remove this import later import large_image - TMP_DIR = 'tmp/zarr_sink' FILE_TYPES = [ 'tiff', @@ -68,6 +66,31 @@ def testAddTileWithMask(): assert not (tile1 == cur[:, :, 0]).all() +def testAddTileWithLevel(): + sink = large_image_source_zarr.new() + tile0 = np.random.random((100, 100)) + sink.addTile(tile0, 0, 0) + arrays = dict(sink._zarr.arrays()) + assert arrays.get('0') is not None + assert arrays.get('0').shape == (100, 100, 1) + + tile1 = np.random.random((10, 10)) + sink.addTile(tile1, 0, 0, level=1) + arrays = dict(sink._zarr.arrays()) + assert arrays.get('0') is not None + assert arrays.get('0').shape == (100, 100, 1) + assert arrays.get('1') is not None + assert arrays.get('1').shape == (10, 10, 1) + + tile1 = np.random.random((100, 100)) + sink.addTile(tile1, 0, 100) + arrays = dict(sink._zarr.arrays()) + assert arrays.get('0') is not None + assert arrays.get('0').shape == (200, 100, 1) + # previously written levels should be cleared after changing level 0 data + assert arrays.get('1') is None + + def testExtraAxis(): sink = large_image_source_zarr.new() sink.addTile(np.random.random((256, 256)), 0, 0, z=1) @@ -182,7 +205,6 @@ def testImageCopySmallMultiband(file_type, tmp_path): @pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) def testImageCopyLargeDownsampling(resample_method, tmp_path): - tmp_path = Path('tmp') # keep results; remove this later output_file = tmp_path / f'{resample_method}.db' sink = large_image_source_zarr.new() source = large_image_source_test.TestTileSource( @@ -191,7 +213,7 @@ def testImageCopyLargeDownsampling(resample_method, tmp_path): tileHeight=128, sizeX=2048, sizeY=4096, - frames="c=2,z=3", + frames='c=2,z=3', ) copyFromSource(source, sink) sink.write(output_file, resample=resample_method) @@ -199,8 +221,8 @@ def testImageCopyLargeDownsampling(resample_method, tmp_path): written_arrays = dict(written._zarr.arrays()) assert len(written_arrays) == written.levels - assert written_arrays.get('root') is not None - assert written_arrays.get('root').shape == (2, 3, 4096, 2048, 3) + assert written_arrays.get('0') is not None + assert written_arrays.get('0').shape == (2, 3, 4096, 2048, 3) assert written_arrays.get('1') is not None assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 3) assert written_arrays.get('2') is not None @@ -213,20 +235,21 @@ def testImageCopyLargeDownsampling(resample_method, tmp_path): @pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) def testImageCopyLargeDownsamplingMultiband(resample_method, tmp_path): - tmp_path = Path('tmp') # keep results; remove this later output_file = tmp_path / f'{resample_method}_multiband.db' sink = large_image_source_zarr.new() - bands = ( - 'red=400-12000,green=0-65535,blue=800-4000,' - 'ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' - ) + # TODO: fix 16-bit images with modes other than NEAREST + # bands = ( + # 'red=400-12000,green=0-65535,blue=800-4000,' + # 'ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' + # ) + bands = 'red=0-50,green=50-100,blue=100-250,other=250-255' source = large_image_source_test.TestTileSource( fractal=True, tileWidth=128, tileHeight=128, sizeX=2048, sizeY=4096, - frames="c=2,z=3", + frames='c=2,z=3', bands=bands, ) copyFromSource(source, sink) @@ -235,11 +258,11 @@ def testImageCopyLargeDownsamplingMultiband(resample_method, tmp_path): written_arrays = dict(written._zarr.arrays()) assert len(written_arrays) == written.levels - assert written_arrays.get('root') is not None - assert written_arrays.get('root').shape == (2, 3, 4096, 2048, 7) + assert written_arrays.get('0') is not None + assert written_arrays.get('0').shape == (2, 3, 4096, 2048, 4) assert written_arrays.get('1') is not None - assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 7) + assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 4) assert written_arrays.get('2') is not None - assert written_arrays.get('2').shape == (2, 3, 1024, 512, 7) + assert written_arrays.get('2').shape == (2, 3, 1024, 512, 4) assert written_arrays.get('3') is not None - assert written_arrays.get('3').shape == (2, 3, 512, 256, 7) + assert written_arrays.get('3').shape == (2, 3, 512, 256, 4) From a8bf91afdb10ca61e75e544023d351b9061bc22e Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 1 Apr 2024 15:08:28 +0000 Subject: [PATCH 07/24] refactor: split `addTile` into two functions to satisfy function complexity check --- .../zarr/large_image_source_zarr/__init__.py | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index a5027e557..abc0cecde 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -519,6 +519,31 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): return self._outputTile(tile, TILE_FORMAT_NUMPY, x, y, z, pilImageAllowed, numpyAllowed, **kwargs) + def _validateNewTile(self, tile, mask, placement, axes): + if not isinstance(tile, np.ndarray) or axes is None: + axes = 'yxs' + tile, mode = _imageToNumpy(tile) + elif not isinstance(axes, str) and not isinstance(axes, list): + err = 'Invalid type for axes. Must be str or list[str].' + raise ValueError(err) + axes = [x.lower() for x in axes] + if axes[-1] != 's': + axes.append('s') + if mask is not None and len(axes) - 1 == len(mask.shape): + mask = mask[:, :, np.newaxis] + if 'x' not in axes or 'y' not in axes: + err = 'Invalid value for axes. Must contain "y" and "x".' + raise ValueError(err) + for k in placement: + if k not in axes: + axes[0:0] = [k] + while len(tile.shape) < len(axes): + tile = np.expand_dims(tile, axis=0) + while mask is not None and len(mask.shape) < len(axes): + mask = np.expand_dims(mask, axis=0) + + return tile, mask, placement, axes + def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): """ Add a numpy or image tile to the image, expanding the image as needed @@ -546,30 +571,10 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): 'y': y, **kwargs, } - if not isinstance(tile, np.ndarray) or axes is None: - axes = 'yxs' - tile, mode = _imageToNumpy(tile) - elif not isinstance(axes, str) and not isinstance(axes, list): - err = 'Invalid type for axes. Must be str or list[str].' - raise ValueError(err) - axes = [x.lower() for x in axes] - if axes[-1] != 's': - axes.append('s') - if mask is not None and len(axes) - 1 == len(mask.shape): - mask = mask[:, :, np.newaxis] - if 'x' not in axes or 'y' not in axes: - err = 'Invalid value for axes. Must contain "y" and "x".' - raise ValueError(err) - for k in placement: - if k not in axes: - axes[0:0] = [k] + tile, mask, placement, axes = self._validateNewTile(tile, mask, placement, axes) + with self._addLock: self._axes = {k: i for i, k in enumerate(axes)} - while len(tile.shape) < len(axes): - tile = np.expand_dims(tile, axis=0) - while mask is not None and len(mask.shape) < len(axes): - mask = np.expand_dims(mask, axis=0) - new_dims = { a: max( self._dims.get(store_path, {}).get(a, 0), From 441995f3c2aef0a85d82841f082c8421a242eac3 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 2 Apr 2024 16:57:32 +0000 Subject: [PATCH 08/24] refactor: move typed resampling methods to `large_image.tilesource` module --- large_image/tilesource/resample.py | 117 ++++++++++++++++++ .../zarr/large_image_source_zarr/__init__.py | 45 +------ .../zarr/large_image_source_zarr/resample.py | 46 ------- test/test_sink.py | 7 +- 4 files changed, 125 insertions(+), 90 deletions(-) create mode 100644 large_image/tilesource/resample.py delete mode 100644 sources/zarr/large_image_source_zarr/resample.py diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py new file mode 100644 index 000000000..d1a5a2412 --- /dev/null +++ b/large_image/tilesource/resample.py @@ -0,0 +1,117 @@ +from enum import Enum +from typing import Dict + +import numpy as np +from PIL import Image + + +class ResampleMethod(Enum): + PIL_NEAREST = Image.Resampling.NEAREST # 0 + PIL_LANCZOS = Image.Resampling.LANCZOS # 1 + PIL_BILINEAR = Image.Resampling.BILINEAR # 2 + PIL_BICUBIC = Image.Resampling.BICUBIC # 3 + PIL_BOX = Image.Resampling.BOX # 4 + PIL_HAMMING = Image.Resampling.HAMMING # 5 + PIL_MAX_ENUM = 5 + NP_MEAN = 6 + NP_MEDIAN = 7 + NP_MODE = 8 + NP_MAX = 9 + NP_MIN = 10 + NP_NEAREST = 11 + NP_MAX_CROSSBAND = 12 + NP_MIN_CROSSBAND = 13 + + +def pilResize( + tile: np.ndarray, + new_shape: Dict, + resample_method: ResampleMethod, +) -> np.ndarray: + # Only NEAREST works for 16 bit images + img = Image.fromarray(tile) + resized_img = img.resize( + (new_shape['width'], new_shape['height']), + resample=resample_method.value, + ) + result = np.array(resized_img).astype(tile.dtype) + return result + + +def numpyResize( + tile: np.ndarray, + new_shape: Dict, + resample_method: ResampleMethod, +) -> np.ndarray: + if resample_method == ResampleMethod.NP_NEAREST: + return tile[::2, ::2] + else: + subarrays = np.asarray( + [ + tile[0::2, 0::2], + tile[1::2, 0::2], + tile[0::2, 1::2], + tile[1::2, 1::2], + ], + ) + if resample_method == ResampleMethod.NP_MEAN: + return np.mean(subarrays, axis=0).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MEDIAN: + return np.median(subarrays, axis=0).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MODE: + result_shape = subarrays[0].shape + result = np.empty(result_shape) + subarrays = subarrays.transpose(1, 2, 0, 3) + for y in range(result_shape[0]): + for x in range(result_shape[1]): + vals, counts = np.unique(subarrays[y, x], axis=0, return_counts=True) + mode = vals[np.argmax(counts)] + result[y, x] = mode + return result + elif resample_method == ResampleMethod.NP_MAX: + summed = np.sum(subarrays, axis=3) + indexes = np.argmax(summed, axis=0) + indexes = np.repeat(indexes[:, :, np.newaxis], tile.shape[2], axis=2) + return np.choose(indexes, subarrays).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MIN: + summed = np.sum(subarrays, axis=3) + indexes = np.argmin(summed, axis=0) + indexes = np.repeat(indexes[:, :, np.newaxis], tile.shape[2], axis=2) + return np.choose(indexes, subarrays).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MAX_CROSSBAND: + return np.max(subarrays, axis=0).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MIN_CROSSBAND: + return np.min(subarrays, axis=0).astype(tile.dtype) + + +def downsampleTileHalfRes( + tile: np.ndarray, + resample_method: ResampleMethod, +) -> np.ndarray: + + resize_function = ( + pilResize + if resample_method.value <= ResampleMethod.PIL_MAX_ENUM.value + else numpyResize + ) + new_shape = { + 'height': int(tile.shape[0] / 2), + 'width': int(tile.shape[1] / 2), + 'bands': 1, + } + if len(tile.shape) > 2: + new_shape['bands'] = tile.shape[-1] + if new_shape['bands'] > 4: + result = np.empty( + (new_shape['height'], new_shape['width'], new_shape['bands']), + dtype=tile.dtype, + ) + for band_index in range(new_shape['bands']): + result[(..., band_index)] = resize_function( + tile[(..., band_index)], + new_shape, + resample_method, + ) + return result + else: + return resize_function(tile, new_shape, resample_method) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index abc0cecde..29be55916 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -11,17 +11,15 @@ import numpy as np import packaging.version import zarr -from PIL import Image import large_image from large_image.cache_util import LruCacheMetaclass, methodcache from large_image.constants import NEW_IMAGE_PATH_FLAG, TILE_FORMAT_NUMPY, SourcePriority from large_image.exceptions import TileSourceError, TileSourceFileNotFoundError from large_image.tilesource import FileTileSource +from large_image.tilesource.resample import ResampleMethod, downsampleTileHalfRes from large_image.tilesource.utilities import _imageToNumpy, nearPowerOfTwo -from .resample import ResampleMethod - try: __version__ = _importlib_version(__name__) except PackageNotFoundError: @@ -679,40 +677,6 @@ def crop(self, value): raise TileSourceError(msg) self._crop = (x, y, w, h) - def _downsampleTile(self, tile, resample_method): - new_shape = { - 'height': int(tile.shape[0] / 2), - 'width': int(tile.shape[1] / 2), - 'bands': 1, - } - if len(tile.shape) > 2: - new_shape['bands'] = tile.shape[-1] - if new_shape['bands'] > 4: - result = np.empty( - (new_shape['height'], new_shape['width'], new_shape['bands']), - dtype=self.dtype, - ) - for band_index in range(new_shape['bands']): - selection = tile[(..., band_index)] - print(selection.shape, band_index) - # use mode I;16 to support 16-bit integers - img = Image.fromarray(selection, mode='I;16') - # TODO: Only NEAREST works for 16 bit images - resized_img = img.resize( - (new_shape['width'], new_shape['height']), - resample=resample_method.value, - ) - result[(..., band_index)] = np.array(resized_img).astype(self.dtype) - return result - else: - img = Image.fromarray(tile) - resized_img = img.resize( - (new_shape['width'], new_shape['height']), - resample=resample_method.value, - ) - result = np.array(resized_img).astype(self.dtype) - return result - def _generateDownsampledLevels(self, resample_method): self._checkEditable() current_arrays = dict(self._zarr.arrays()) @@ -744,7 +708,7 @@ def _generateDownsampledLevels(self, resample_method): output=iterator_output, resample=False, # TODO: incorporate resampling in core ): - new_tile = self._downsampleTile(tile['tile'], resample_method) + new_tile = downsampleTileHalfRes(tile['tile'], resample_method) overlap = {k: int(v / 2) for k, v in tile['tile_overlap'].items()} new_tile = new_tile[ slice(overlap['top'], new_tile.shape[0] - overlap['bottom']), @@ -776,7 +740,7 @@ def write( lossy=True, alpha=True, overwriteAllowed=True, - resample=ResampleMethod.PIL_NEAREST, + resample=None, ): """ Output the current image to a file. @@ -796,6 +760,9 @@ def write( else: raise TileSourceError('Output path exists (%s).' % str(path)) + if resample is None: + resample = ResampleMethod.NP_NEAREST if not lossy else ResampleMethod.PIL_LANCZOS + self._generateDownsampledLevels(resample) suffix = Path(path).suffix data_dir = self._tempdir diff --git a/sources/zarr/large_image_source_zarr/resample.py b/sources/zarr/large_image_source_zarr/resample.py deleted file mode 100644 index a931538b7..000000000 --- a/sources/zarr/large_image_source_zarr/resample.py +++ /dev/null @@ -1,46 +0,0 @@ -from enum import Enum - -from PIL.Image import Resampling - -# TODO: move this module to large_image/tilesource if the -# implementation does not include any zarr-specific functions -# and type it! if it is moved - - -def resample_mean(data): - print('resample mean', data) - - -def resample_median(data): - print('resample median', data) - - -def resample_mode(data): - print('resample mode', data) - - -def resample_max(data): - print('resample max', data) - - -def resample_min(data): - print('resample min', data) - - -def resample_nearest(data): - print('resample nearest', data) - - -class ResampleMethod(Enum): - MEAN = resample_mean - MEDIAN = resample_median - MODE = resample_mode - MAX = resample_max - MIN = resample_min - NEAREST = resample_nearest - PIL_BICUBIC = Resampling.BICUBIC - PIL_BILINEAR = Resampling.BILINEAR - PIL_BOX = Resampling.BOX - PIL_HAMMING = Resampling.HAMMING - PIL_LANCZOS = Resampling.LANCZOS - PIL_NEAREST = Resampling.NEAREST diff --git a/test/test_sink.py b/test/test_sink.py index 1bed4e8cb..ec063e04a 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -1,10 +1,10 @@ - import large_image_source_test import large_image_source_zarr import numpy as np import pytest import large_image +from large_image.tilesource.resample import ResampleMethod TMP_DIR = 'tmp/zarr_sink' FILE_TYPES = [ @@ -13,11 +13,8 @@ 'db', 'zip', 'zarr', - # "dz", - # 'svi', - # 'svs', ] -RESAMPLE_METHODS = list(large_image_source_zarr.resample.ResampleMethod) +RESAMPLE_METHODS = list(ResampleMethod) def copyFromSource(source, sink): From 42aeffe838790f2bcc1c05cdee8fd81f4ae1ca10 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 14:03:12 +0000 Subject: [PATCH 09/24] fix: simplify NP_MODE resize implementation --- large_image/tilesource/resample.py | 39 ++++++++++++++++++------------ 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index d1a5a2412..32adc5541 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -46,6 +46,7 @@ def numpyResize( if resample_method == ResampleMethod.NP_NEAREST: return tile[::2, ::2] else: + pixel_selection = None subarrays = np.asarray( [ tile[0::2, 0::2], @@ -54,35 +55,43 @@ def numpyResize( tile[1::2, 1::2], ], ) + if resample_method == ResampleMethod.NP_MEAN: return np.mean(subarrays, axis=0).astype(tile.dtype) elif resample_method == ResampleMethod.NP_MEDIAN: return np.median(subarrays, axis=0).astype(tile.dtype) elif resample_method == ResampleMethod.NP_MODE: - result_shape = subarrays[0].shape - result = np.empty(result_shape) - subarrays = subarrays.transpose(1, 2, 0, 3) - for y in range(result_shape[0]): - for x in range(result_shape[1]): - vals, counts = np.unique(subarrays[y, x], axis=0, return_counts=True) - mode = vals[np.argmax(counts)] - result[y, x] = mode - return result + # if a pixel occurs twice in a set of four, it is a mode + # if no mode, default to pixel 0. check for minimal matches 1=2, 1=3, 2=3 + pixel_selection = np.where( + ( + (subarrays[1] == subarrays[2]).all(axis=2) | + (subarrays[1] == subarrays[3]).all(axis=2) + ), + 1, np.where( + (subarrays[2] == subarrays[3]).all(axis=2), + 2, 0, + ), + ) elif resample_method == ResampleMethod.NP_MAX: summed = np.sum(subarrays, axis=3) - indexes = np.argmax(summed, axis=0) - indexes = np.repeat(indexes[:, :, np.newaxis], tile.shape[2], axis=2) - return np.choose(indexes, subarrays).astype(tile.dtype) + pixel_selection = np.argmax(summed, axis=0) elif resample_method == ResampleMethod.NP_MIN: summed = np.sum(subarrays, axis=3) - indexes = np.argmin(summed, axis=0) - indexes = np.repeat(indexes[:, :, np.newaxis], tile.shape[2], axis=2) - return np.choose(indexes, subarrays).astype(tile.dtype) + pixel_selection = np.argmin(summed, axis=0) elif resample_method == ResampleMethod.NP_MAX_CROSSBAND: return np.max(subarrays, axis=0).astype(tile.dtype) elif resample_method == ResampleMethod.NP_MIN_CROSSBAND: return np.min(subarrays, axis=0).astype(tile.dtype) + if pixel_selection is not None: + pixel_selection = np.expand_dims(pixel_selection, axis=2) + pixel_selection = np.repeat(pixel_selection, tile.shape[2], axis=2) + return np.choose(pixel_selection, subarrays).astype(tile.dtype) + else: + msg = f'Unknown resample method {resample_method}.' + raise ValueError(msg) + def downsampleTileHalfRes( tile: np.ndarray, From 49a46d778a5c830db3ef70bd081277e88b8d63c9 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 14:12:15 +0000 Subject: [PATCH 10/24] fix: eliminate type error for 2d tiles --- large_image/tilesource/resample.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index 32adc5541..ad5f4187a 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -85,8 +85,9 @@ def numpyResize( return np.min(subarrays, axis=0).astype(tile.dtype) if pixel_selection is not None: - pixel_selection = np.expand_dims(pixel_selection, axis=2) - pixel_selection = np.repeat(pixel_selection, tile.shape[2], axis=2) + if len(tile.shape) > 2: + pixel_selection = np.expand_dims(pixel_selection, axis=2) + pixel_selection = np.repeat(pixel_selection, tile.shape[2], axis=2) return np.choose(pixel_selection, subarrays).astype(tile.dtype) else: msg = f'Unknown resample method {resample_method}.' From e769fe00cbd96e2dc2625815b27817bd96c7d10f Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 15:57:22 +0000 Subject: [PATCH 11/24] fix: only default to LANCZOS if dtype is uint8 --- sources/zarr/large_image_source_zarr/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 29be55916..a55d12014 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -761,7 +761,11 @@ def write( raise TileSourceError('Output path exists (%s).' % str(path)) if resample is None: - resample = ResampleMethod.NP_NEAREST if not lossy else ResampleMethod.PIL_LANCZOS + resample = ( + ResampleMethod.PIL_LANCZOS + if lossy and self.dtype == np.uint8 + else ResampleMethod.NP_NEAREST + ) self._generateDownsampledLevels(resample) suffix = Path(path).suffix From 94d517d71840daf0052cb20e1c01e50a8f336ad4 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 16:20:42 +0000 Subject: [PATCH 12/24] fix: don't split bands when using numpy resample methods --- large_image/tilesource/resample.py | 38 +++++++++++++----------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index ad5f4187a..0b3eca114 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -85,9 +85,8 @@ def numpyResize( return np.min(subarrays, axis=0).astype(tile.dtype) if pixel_selection is not None: - if len(tile.shape) > 2: - pixel_selection = np.expand_dims(pixel_selection, axis=2) - pixel_selection = np.repeat(pixel_selection, tile.shape[2], axis=2) + pixel_selection = np.expand_dims(pixel_selection, axis=2) + pixel_selection = np.repeat(pixel_selection, tile.shape[2], axis=2) return np.choose(pixel_selection, subarrays).astype(tile.dtype) else: msg = f'Unknown resample method {resample_method}.' @@ -98,12 +97,6 @@ def downsampleTileHalfRes( tile: np.ndarray, resample_method: ResampleMethod, ) -> np.ndarray: - - resize_function = ( - pilResize - if resample_method.value <= ResampleMethod.PIL_MAX_ENUM.value - else numpyResize - ) new_shape = { 'height': int(tile.shape[0] / 2), 'width': int(tile.shape[1] / 2), @@ -111,17 +104,20 @@ def downsampleTileHalfRes( } if len(tile.shape) > 2: new_shape['bands'] = tile.shape[-1] - if new_shape['bands'] > 4: - result = np.empty( - (new_shape['height'], new_shape['width'], new_shape['bands']), - dtype=tile.dtype, - ) - for band_index in range(new_shape['bands']): - result[(..., band_index)] = resize_function( - tile[(..., band_index)], - new_shape, - resample_method, + if resample_method.value <= ResampleMethod.PIL_MAX_ENUM.value: + if new_shape['bands'] > 4: + result = np.empty( + (new_shape['height'], new_shape['width'], new_shape['bands']), + dtype=tile.dtype, ) - return result + for band_index in range(new_shape['bands']): + result[(..., band_index)] = pilResize( + tile[(..., band_index)], + new_shape, + resample_method, + ) + return result + else: + return pilResize(tile, new_shape, resample_method) else: - return resize_function(tile, new_shape, resample_method) + return numpyResize(tile, new_shape, resample_method) From d33c89b5eadf53eaeaf46c5845d22c384b844296 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 16:21:50 +0000 Subject: [PATCH 13/24] test: use 6 8-bit bands in downsampling multiband test --- test/test_sink.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index ec063e04a..e718ff6bb 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -234,12 +234,7 @@ def testImageCopyLargeDownsampling(resample_method, tmp_path): def testImageCopyLargeDownsamplingMultiband(resample_method, tmp_path): output_file = tmp_path / f'{resample_method}_multiband.db' sink = large_image_source_zarr.new() - # TODO: fix 16-bit images with modes other than NEAREST - # bands = ( - # 'red=400-12000,green=0-65535,blue=800-4000,' - # 'ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' - # ) - bands = 'red=0-50,green=50-100,blue=100-250,other=250-255' + bands = 'red=0-255,green=0-255,blue=0-255,ir=0-255,gray=0-255,other=0-255' source = large_image_source_test.TestTileSource( fractal=True, tileWidth=128, @@ -256,10 +251,10 @@ def testImageCopyLargeDownsamplingMultiband(resample_method, tmp_path): assert len(written_arrays) == written.levels assert written_arrays.get('0') is not None - assert written_arrays.get('0').shape == (2, 3, 4096, 2048, 4) + assert written_arrays.get('0').shape == (2, 3, 4096, 2048, 6) assert written_arrays.get('1') is not None - assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 4) + assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 6) assert written_arrays.get('2') is not None - assert written_arrays.get('2').shape == (2, 3, 1024, 512, 4) + assert written_arrays.get('2').shape == (2, 3, 1024, 512, 6) assert written_arrays.get('3') is not None - assert written_arrays.get('3').shape == (2, 3, 512, 256, 4) + assert written_arrays.get('3').shape == (2, 3, 512, 256, 6) From cbd48dacb5cacb3706b7b40b82082b2dbc1354f9 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 16:30:26 +0000 Subject: [PATCH 14/24] fix: put back tile shape check for typing --- large_image/tilesource/resample.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index 0b3eca114..e82379812 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -85,8 +85,9 @@ def numpyResize( return np.min(subarrays, axis=0).astype(tile.dtype) if pixel_selection is not None: - pixel_selection = np.expand_dims(pixel_selection, axis=2) - pixel_selection = np.repeat(pixel_selection, tile.shape[2], axis=2) + if len(tile.shape) > 2: + pixel_selection = np.expand_dims(pixel_selection, axis=2) + pixel_selection = np.repeat(pixel_selection, tile.shape[2], axis=2) return np.choose(pixel_selection, subarrays).astype(tile.dtype) else: msg = f'Unknown resample method {resample_method}.' From a47257632de2cba4190098fbe5b54150ac4f0ac4 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 19:22:47 +0000 Subject: [PATCH 15/24] fix: round up tile dimensions when dividing by 2 --- large_image/tilesource/resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index e82379812..26b83cd92 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -99,8 +99,8 @@ def downsampleTileHalfRes( resample_method: ResampleMethod, ) -> np.ndarray: new_shape = { - 'height': int(tile.shape[0] / 2), - 'width': int(tile.shape[1] / 2), + 'height': (tile.shape[0] + 1) // 2, + 'width': (tile.shape[1] + 1) // 2, 'bands': 1, } if len(tile.shape) > 2: From 9a74c0ac85f7c6394c25d0a29e395f0c371f337f Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 19:42:22 +0000 Subject: [PATCH 16/24] fix: only overlap tiles when using interpolated resample modes --- .../zarr/large_image_source_zarr/__init__.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index a55d12014..1922875bb 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -688,7 +688,18 @@ def _generateDownsampledLevels(self, resample_method): raise TileSourceError(msg) metadata = self.getMetadata() - tile_size = dict(width=4096, height=4096) + + if ( + resample_method.value < ResampleMethod.PIL_MAX_ENUM.value and + resample_method != ResampleMethod.PIL_NEAREST + ): + tile_overlap = dict(x=4, y=4, edges=True) + else: + tile_overlap = dict(x=0, y=0) + tile_size = dict( + width=4096 + tile_overlap['x'], + height=4096 + tile_overlap['y'], + ) for level in range(1, self.levels): scale_factor = 2 ** level iterator_output = dict( @@ -703,7 +714,7 @@ def _generateDownsampledLevels(self, resample_method): } for tile in self.tileIterator( tile_size=tile_size, - tile_overlap=dict(x=4, y=4), + tile_overlap=tile_overlap, frame=frame['Index'], output=iterator_output, resample=False, # TODO: incorporate resampling in core @@ -715,14 +726,8 @@ def _generateDownsampledLevels(self, resample_method): slice(overlap['left'], new_tile.shape[1] - overlap['right']), ] - x = int( - tile['tile_position']['level_x'] * - (tile_size['width'] / 2), - ) - y = int( - tile['tile_position']['level_y'] * - (tile_size['height'] / 2), - ) + x = int((tile['x'] + overlap['left']) / 2) + y = int((tile['y'] + overlap['top']) / 2) self.addTile( new_tile, From e7234b2e1a6dc310959c1fbaef8ee8d91a9377ee Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 20:17:02 +0000 Subject: [PATCH 17/24] fix: swap order of operations computing tile start coords using overlap --- sources/zarr/large_image_source_zarr/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 1922875bb..e6395d62b 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -726,8 +726,8 @@ def _generateDownsampledLevels(self, resample_method): slice(overlap['left'], new_tile.shape[1] - overlap['right']), ] - x = int((tile['x'] + overlap['left']) / 2) - y = int((tile['y'] + overlap['top']) / 2) + x = int(tile['x'] / 2 + overlap['left']) + y = int(tile['y'] / 2 + overlap['top']) self.addTile( new_tile, From dbcb661c038b87058b7d2975d5952b43b38b46d3 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 21:12:20 +0000 Subject: [PATCH 18/24] fix: copy last row/col when tile has odd dims --- large_image/tilesource/resample.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index 26b83cd92..09ff55b6c 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -46,6 +46,11 @@ def numpyResize( if resample_method == ResampleMethod.NP_NEAREST: return tile[::2, ::2] else: + if tile.shape[0] % 2 != 0: + tile = np.append(tile, np.expand_dims(tile[-1], axis=0), axis=0) + if tile.shape[1] % 2 != 0: + tile = np.append(tile, np.expand_dims(tile[:, -1], axis=1), axis=1) + pixel_selection = None subarrays = np.asarray( [ From ff67307955fbf421b3ab44357dfb5dc1721e25f8 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 3 Apr 2024 21:22:26 +0000 Subject: [PATCH 19/24] refactor: rename min/max methods --- large_image/tilesource/resample.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index 09ff55b6c..a36b4084f 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -19,8 +19,8 @@ class ResampleMethod(Enum): NP_MAX = 9 NP_MIN = 10 NP_NEAREST = 11 - NP_MAX_CROSSBAND = 12 - NP_MIN_CROSSBAND = 13 + NP_MAX_COLOR = 12 + NP_MIN_COLOR = 13 def pilResize( @@ -65,6 +65,16 @@ def numpyResize( return np.mean(subarrays, axis=0).astype(tile.dtype) elif resample_method == ResampleMethod.NP_MEDIAN: return np.median(subarrays, axis=0).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MAX: + return np.max(subarrays, axis=0).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MIN: + return np.min(subarrays, axis=0).astype(tile.dtype) + elif resample_method == ResampleMethod.NP_MAX_COLOR: + summed = np.sum(subarrays, axis=3) + pixel_selection = np.argmax(summed, axis=0) + elif resample_method == ResampleMethod.NP_MIN_COLOR: + summed = np.sum(subarrays, axis=3) + pixel_selection = np.argmin(summed, axis=0) elif resample_method == ResampleMethod.NP_MODE: # if a pixel occurs twice in a set of four, it is a mode # if no mode, default to pixel 0. check for minimal matches 1=2, 1=3, 2=3 @@ -78,16 +88,6 @@ def numpyResize( 2, 0, ), ) - elif resample_method == ResampleMethod.NP_MAX: - summed = np.sum(subarrays, axis=3) - pixel_selection = np.argmax(summed, axis=0) - elif resample_method == ResampleMethod.NP_MIN: - summed = np.sum(subarrays, axis=3) - pixel_selection = np.argmin(summed, axis=0) - elif resample_method == ResampleMethod.NP_MAX_CROSSBAND: - return np.max(subarrays, axis=0).astype(tile.dtype) - elif resample_method == ResampleMethod.NP_MIN_CROSSBAND: - return np.min(subarrays, axis=0).astype(tile.dtype) if pixel_selection is not None: if len(tile.shape) > 2: From 815c87e914c2b0242d390014bb70b14a7f501cac Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 4 Apr 2024 12:18:21 +0000 Subject: [PATCH 20/24] style: remove extra whitespace --- large_image/tilesource/resample.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/large_image/tilesource/resample.py b/large_image/tilesource/resample.py index a36b4084f..1d374f272 100644 --- a/large_image/tilesource/resample.py +++ b/large_image/tilesource/resample.py @@ -49,7 +49,7 @@ def numpyResize( if tile.shape[0] % 2 != 0: tile = np.append(tile, np.expand_dims(tile[-1], axis=0), axis=0) if tile.shape[1] % 2 != 0: - tile = np.append(tile, np.expand_dims(tile[:, -1], axis=1), axis=1) + tile = np.append(tile, np.expand_dims(tile[:, -1], axis=1), axis=1) pixel_selection = None subarrays = np.asarray( From 4ef0c584f47119f2e7c4f350255a8361430b82a0 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 4 Apr 2024 14:53:27 +0000 Subject: [PATCH 21/24] fix: update crop method and crop before generating downsampled levels --- .../zarr/large_image_source_zarr/__init__.py | 85 +++++++++---------- test/test_sink.py | 33 +++++++ 2 files changed, 75 insertions(+), 43 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index e6395d62b..0d58bcc54 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -765,59 +765,58 @@ def write( else: raise TileSourceError('Output path exists (%s).' % str(path)) - if resample is None: - resample = ( - ResampleMethod.PIL_LANCZOS - if lossy and self.dtype == np.uint8 - else ResampleMethod.NP_NEAREST - ) - - self._generateDownsampledLevels(resample) suffix = Path(path).suffix - data_dir = self._tempdir - data_store = self._zarr_store + source = self if self.crop: - x, y, w, h = self.crop - current_arrays = dict(self._zarr.arrays()) - # create new temp storage for cropped data - data_dir = tempfile.TemporaryDirectory() - data_store = zarr.DirectoryStore(data_dir.name) - cropped_zarr = zarr.open(data_store, mode='w') - for arr_name in current_arrays: - arr = np.array(current_arrays[arr_name]) - cropped_arr = arr.take( - indices=range(x, x + w), - axis=self._axes.get('x'), - ).take( - indices=range(y, y + h), - axis=self._axes.get('y'), - ) - cropped_zarr.create_dataset(arr_name, data=cropped_arr, overwrite=True) - cropped_zarr.attrs.update(self._zarr.attrs) - - if suffix == '.zarr': - shutil.copytree(data_dir.name, path) - - elif suffix in ['.db', '.sqlite']: - sqlite_store = zarr.SQLiteStore(path) - zarr.copy_store(data_store, sqlite_store, if_exists='replace') - sqlite_store.close() + top, left, height, width = self.crop + source = new() + source._zarr.attrs.update(self._zarr.attrs) + for frame in self.getMetadata().get('frames', [{'Index': 0}]): + frame_position = { + k.replace('Index', '').lower(): v + for k, v in frame.items() + if k.replace('Index', '').lower() in self._axes + } + for tile in self.tileIterator( + frame=frame['Index'], + region=dict(top=top, left=left, width=width, height=height), + resample=False, + ): + source.addTile( + tile['tile'], + x=tile['x'] - left, + y=tile['y'] - top, + axes=list(self._axes.keys()), + **frame_position, + ) - elif suffix == '.zip': - zip_store = zarr.ZipStore(path) - zarr.copy_store(data_store, zip_store, if_exists='replace') - zip_store.close() + if suffix in ['.zarr', '.db', '.sqlite', '.zip']: + if resample is None: + resample = ( + ResampleMethod.PIL_LANCZOS + if lossy and source.dtype == np.uint8 + else ResampleMethod.NP_NEAREST + ) + source._generateDownsampledLevels(resample) + + if suffix == '.zarr': + shutil.copytree(source._tempdir.name, path) + elif suffix in ['.db', '.sqlite']: + sqlite_store = zarr.SQLiteStore(path) + zarr.copy_store(source._zarr_store, sqlite_store, if_exists='replace') + sqlite_store.close() + elif suffix == '.zip': + zip_store = zarr.ZipStore(path) + zarr.copy_store(source._zarr_store, zip_store, if_exists='replace') + zip_store.close() else: from large_image_converter import convert - attrs_path = Path(data_dir.name) / '.zattrs' + attrs_path = Path(source._tempdir.name) / '.zattrs' convert(str(attrs_path), path, overwrite=overwriteAllowed) - if self.crop: - shutil.rmtree(data_dir.name) - def open(*args, **kwargs): """ diff --git a/test/test_sink.py b/test/test_sink.py index e718ff6bb..de5bbf1a1 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -258,3 +258,36 @@ def testImageCopyLargeDownsamplingMultiband(resample_method, tmp_path): assert written_arrays.get('2').shape == (2, 3, 1024, 512, 6) assert written_arrays.get('3') is not None assert written_arrays.get('3').shape == (2, 3, 512, 256, 6) + + +@pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) +def testCropAndDownsample(resample_method, tmp_path): + output_file = tmp_path / f'cropped_{resample_method}.db' + sink = large_image_source_zarr.new() + + # add tiles with some overlap to multiple frames + num_frames = 4 + num_bands = 5 + for z in range(num_frames): + sink.addTile(np.random.random((1000, 1000, num_bands)), 0, 0, z=z) + sink.addTile(np.random.random((1000, 1000, num_bands)), 950, 0, z=z) + sink.addTile(np.random.random((1000, 1000, num_bands)), 0, 900, z=z) + sink.addTile(np.random.random((1000, 1000, num_bands)), 950, 900, z=z) + + current_arrays = dict(sink._zarr.arrays()) + assert len(current_arrays) == 1 + assert current_arrays.get('0') is not None + assert current_arrays.get('0').shape == (num_frames, 1900, 1950, num_bands) + + sink.crop = (100, 50, 1800, 1825) + sink.write(output_file) + written = large_image_source_zarr.open(output_file) + written_arrays = dict(written._zarr.arrays()) + + assert len(written_arrays) == written.levels + assert written_arrays.get('0') is not None + assert written_arrays.get('0').shape == (num_frames, 1800, 1825, num_bands) + assert written_arrays.get('1') is not None + assert written_arrays.get('1').shape == (num_frames, 900, 913, num_bands) + assert written_arrays.get('2') is not None + assert written_arrays.get('2').shape == (num_frames, 450, 456, num_bands) From f7876a3e4512dc3c2349536a88a5204dca91f13d Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 4 Apr 2024 16:37:02 +0000 Subject: [PATCH 22/24] test: Reorganize tests and add assertion for downsampled content --- test/test_sink.py | 130 ++++++++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 57 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index de5bbf1a1..b023dc36a 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -14,7 +14,6 @@ 'zip', 'zarr', ] -RESAMPLE_METHODS = list(ResampleMethod) def copyFromSource(source, sink): @@ -124,7 +123,7 @@ def testCrop(file_type, tmp_path): @pytest.mark.parametrize('file_type', FILE_TYPES) -def testImageCopySmall(file_type, tmp_path): +def testImageCopySmallFileTypes(file_type, tmp_path): output_file = tmp_path / f'test.{file_type}' sink = large_image_source_zarr.new() source = large_image_source_test.TestTileSource( @@ -149,8 +148,8 @@ def testImageCopySmall(file_type, tmp_path): if file_type == 'zarr': output_file /= '.zattrs' written = large_image.open(output_file) - new_metadata = written.metadata + new_metadata = written.getMetadata() assert new_metadata.get('sizeX') == 512 assert new_metadata.get('sizeY') == 1024 assert new_metadata.get('dtype') == 'uint8' @@ -159,13 +158,16 @@ def testImageCopySmall(file_type, tmp_path): assert len(new_metadata.get('frames')) == 6 -@pytest.mark.parametrize('file_type', FILE_TYPES) -def testImageCopySmallMultiband(file_type, tmp_path): - output_file = tmp_path / f'test.{file_type}' +@pytest.mark.parametrize('resample_method', [ + ResampleMethod.PIL_LANCZOS, + ResampleMethod.NP_NEAREST, +]) +def testImageCopySmallMultiband(resample_method, tmp_path): + output_file = tmp_path / f'test_{resample_method}.db' sink = large_image_source_zarr.new() bands = ( - 'red=400-12000,green=0-65535,blue=800-4000,' - 'ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' + 'red=0-255,green=0-255,blue=0-255,' + 'ir1=0-255,ir2=0-255,gray=0-255,other=0-255' ) source = large_image_source_test.TestTileSource( fractal=True, @@ -181,88 +183,102 @@ def testImageCopySmallMultiband(file_type, tmp_path): metadata = sink.getMetadata() assert metadata.get('sizeX') == 512 assert metadata.get('sizeY') == 1024 - assert metadata.get('dtype') == 'uint16' + assert metadata.get('dtype') == 'uint8' assert metadata.get('levels') == 2 assert metadata.get('bandCount') == 7 assert len(metadata.get('frames')) == 6 - sink.write(output_file) - if file_type == 'zarr': - output_file /= '.zattrs' + sink.write(output_file, resample=resample_method) written = large_image.open(output_file) new_metadata = written.getMetadata() assert new_metadata.get('sizeX') == 512 assert new_metadata.get('sizeY') == 1024 - assert new_metadata.get('dtype') == 'uint16' + assert new_metadata.get('dtype') == 'uint8' assert new_metadata.get('levels') == 2 or new_metadata.get('levels') == 3 assert new_metadata.get('bandCount') == 7 assert len(new_metadata.get('frames')) == 6 - -@pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) -def testImageCopyLargeDownsampling(resample_method, tmp_path): - output_file = tmp_path / f'{resample_method}.db' - sink = large_image_source_zarr.new() - source = large_image_source_test.TestTileSource( - fractal=True, - tileWidth=128, - tileHeight=128, - sizeX=2048, - sizeY=4096, - frames='c=2,z=3', - ) - copyFromSource(source, sink) - sink.write(output_file, resample=resample_method) - written = large_image_source_zarr.open(output_file) written_arrays = dict(written._zarr.arrays()) - assert len(written_arrays) == written.levels assert written_arrays.get('0') is not None - assert written_arrays.get('0').shape == (2, 3, 4096, 2048, 3) + assert written_arrays.get('0').shape == (2, 3, 1024, 512, 7) assert written_arrays.get('1') is not None - assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 3) - assert written_arrays.get('2') is not None - assert written_arrays.get('2').shape == (2, 3, 1024, 512, 3) - assert written_arrays.get('3') is not None - assert written_arrays.get('3').shape == (2, 3, 512, 256, 3) + assert written_arrays.get('1').shape == (2, 3, 512, 256, 7) - # TODO: Can the content of the downsampled data be compared back to the test source? - -@pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) -def testImageCopyLargeDownsamplingMultiband(resample_method, tmp_path): - output_file = tmp_path / f'{resample_method}_multiband.db' +@pytest.mark.parametrize('resample_method', list(ResampleMethod)) +def testImageCopySmallDownsampling(resample_method, tmp_path): + output_file = tmp_path / f'test_{resample_method}.db' sink = large_image_source_zarr.new() - bands = 'red=0-255,green=0-255,blue=0-255,ir=0-255,gray=0-255,other=0-255' source = large_image_source_test.TestTileSource( fractal=True, tileWidth=128, tileHeight=128, - sizeX=2048, - sizeY=4096, + sizeX=512, + sizeY=1024, frames='c=2,z=3', - bands=bands, ) copyFromSource(source, sink) + sink.write(output_file, resample=resample_method) - written = large_image_source_zarr.open(output_file) - written_arrays = dict(written._zarr.arrays()) + written = large_image.open(output_file) + written_arrays = dict(written._zarr.arrays()) assert len(written_arrays) == written.levels assert written_arrays.get('0') is not None - assert written_arrays.get('0').shape == (2, 3, 4096, 2048, 6) + assert written_arrays.get('0').shape == (2, 3, 1024, 512, 3) assert written_arrays.get('1') is not None - assert written_arrays.get('1').shape == (2, 3, 2048, 1024, 6) - assert written_arrays.get('2') is not None - assert written_arrays.get('2').shape == (2, 3, 1024, 512, 6) - assert written_arrays.get('3') is not None - assert written_arrays.get('3').shape == (2, 3, 512, 256, 6) + assert written_arrays.get('1').shape == (2, 3, 512, 256, 3) - -@pytest.mark.parametrize('resample_method', RESAMPLE_METHODS) -def testCropAndDownsample(resample_method, tmp_path): - output_file = tmp_path / f'cropped_{resample_method}.db' + sample_region, _format = written.getRegion( + region=dict(top=252, bottom=260, left=0, right=4), + output=dict(maxWidth=2, maxHeight=4), + format='numpy', + ) + assert sample_region.shape == (4, 2, 3) + white_mask = (sample_region[..., 0] == 255).flatten().tolist() + + if resample_method == ResampleMethod.PIL_NEAREST: + # expect any of the four variations, this will depend on version + expected_masks = [ + [True, False, True, False, True, True, True, False], # upper left + [True, False, True, True, True, False, True, False], # lower left + [True, False, False, True, True, True, False, False], # upper right + [False, False, True, True, False, True, True, False], # lower right + ] + elif resample_method == ResampleMethod.PIL_LANCZOS: + expected_masks = [[False, False, False, False, False, False, False, False]] + elif resample_method == ResampleMethod.PIL_BILINEAR: + expected_masks = [[False, False, False, False, False, False, False, False]] + elif resample_method == ResampleMethod.PIL_BICUBIC: + expected_masks = [[False, False, False, False, False, False, False, False]] + elif resample_method == ResampleMethod.PIL_BOX: + expected_masks = [[False, False, False, False, False, False, False, False]] + elif resample_method == ResampleMethod.PIL_HAMMING: + expected_masks = [[False, False, False, False, False, False, False, False]] + elif resample_method == ResampleMethod.NP_MEAN: + expected_masks = [[False, False, False, False, False, False, False, False]] + elif resample_method == ResampleMethod.NP_MEDIAN: + expected_masks = [[True, False, True, True, True, True, True, False]] + elif resample_method == ResampleMethod.NP_MODE: + expected_masks = [[True, False, True, True, True, True, True, False]] + elif resample_method == ResampleMethod.NP_MAX: + expected_masks = [[True, False, True, True, True, True, True, False]] + elif resample_method == ResampleMethod.NP_MIN: + expected_masks = [[False, False, False, False, False, False, False, False]] + elif resample_method == ResampleMethod.NP_NEAREST: + expected_masks = [[True, False, True, False, True, True, True, False]] + elif resample_method == ResampleMethod.NP_MAX_COLOR: + expected_masks = [[True, False, True, True, True, True, True, False]] + elif resample_method == ResampleMethod.NP_MIN_COLOR: + expected_masks = [[False, False, False, False, False, False, False, False]] + + assert white_mask in expected_masks + + +def testCropAndDownsample(tmp_path): + output_file = tmp_path / 'cropped.db' sink = large_image_source_zarr.new() # add tiles with some overlap to multiple frames From fe4ed29b79e973c4ba270be10aaffc5bbe9f217e Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 4 Apr 2024 16:54:25 +0000 Subject: [PATCH 23/24] refactor: define expected masks as a dict to pass function complexity check --- test/test_sink.py | 65 ++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index b023dc36a..53a1a3607 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -239,42 +239,43 @@ def testImageCopySmallDownsampling(resample_method, tmp_path): assert sample_region.shape == (4, 2, 3) white_mask = (sample_region[..., 0] == 255).flatten().tolist() - if resample_method == ResampleMethod.PIL_NEAREST: - # expect any of the four variations, this will depend on version - expected_masks = [ + expected_masks = { + ResampleMethod.PIL_NEAREST: [ + # expect any of the four variations, this will depend on version [True, False, True, False, True, True, True, False], # upper left [True, False, True, True, True, False, True, False], # lower left [True, False, False, True, True, True, False, False], # upper right [False, False, True, True, False, True, True, False], # lower right - ] - elif resample_method == ResampleMethod.PIL_LANCZOS: - expected_masks = [[False, False, False, False, False, False, False, False]] - elif resample_method == ResampleMethod.PIL_BILINEAR: - expected_masks = [[False, False, False, False, False, False, False, False]] - elif resample_method == ResampleMethod.PIL_BICUBIC: - expected_masks = [[False, False, False, False, False, False, False, False]] - elif resample_method == ResampleMethod.PIL_BOX: - expected_masks = [[False, False, False, False, False, False, False, False]] - elif resample_method == ResampleMethod.PIL_HAMMING: - expected_masks = [[False, False, False, False, False, False, False, False]] - elif resample_method == ResampleMethod.NP_MEAN: - expected_masks = [[False, False, False, False, False, False, False, False]] - elif resample_method == ResampleMethod.NP_MEDIAN: - expected_masks = [[True, False, True, True, True, True, True, False]] - elif resample_method == ResampleMethod.NP_MODE: - expected_masks = [[True, False, True, True, True, True, True, False]] - elif resample_method == ResampleMethod.NP_MAX: - expected_masks = [[True, False, True, True, True, True, True, False]] - elif resample_method == ResampleMethod.NP_MIN: - expected_masks = [[False, False, False, False, False, False, False, False]] - elif resample_method == ResampleMethod.NP_NEAREST: - expected_masks = [[True, False, True, False, True, True, True, False]] - elif resample_method == ResampleMethod.NP_MAX_COLOR: - expected_masks = [[True, False, True, True, True, True, True, False]] - elif resample_method == ResampleMethod.NP_MIN_COLOR: - expected_masks = [[False, False, False, False, False, False, False, False]] - - assert white_mask in expected_masks + ], + ResampleMethod.PIL_LANCZOS: + [[False, False, False, False, False, False, False, False]], + ResampleMethod.PIL_BILINEAR: + [[False, False, False, False, False, False, False, False]], + ResampleMethod.PIL_BICUBIC: + [[False, False, False, False, False, False, False, False]], + ResampleMethod.PIL_BOX: + [[False, False, False, False, False, False, False, False]], + ResampleMethod.PIL_HAMMING: + [[False, False, False, False, False, False, False, False]], + ResampleMethod.NP_MEAN: + [[False, False, False, False, False, False, False, False]], + ResampleMethod.NP_MEDIAN: + [[True, False, True, True, True, True, True, False]], + ResampleMethod.NP_MODE: + [[True, False, True, True, True, True, True, False]], + ResampleMethod.NP_MAX: + [[True, False, True, True, True, True, True, False]], + ResampleMethod.NP_MIN: + [[False, False, False, False, False, False, False, False]], + ResampleMethod.NP_NEAREST: + [[True, False, True, False, True, True, True, False]], + ResampleMethod.NP_MAX_COLOR: + [[True, False, True, True, True, True, True, False]], + ResampleMethod.NP_MIN_COLOR: + [[False, False, False, False, False, False, False, False]], + } + + assert white_mask in expected_masks[resample_method] def testCropAndDownsample(tmp_path): From 2e10ec443da02cb4f454a64867090f71034826cd Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Fri, 5 Apr 2024 19:22:43 +0000 Subject: [PATCH 24/24] test: use odd dimensions in downsampling test --- test/test_sink.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index 53a1a3607..6d31d9687 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -215,8 +215,8 @@ def testImageCopySmallDownsampling(resample_method, tmp_path): fractal=True, tileWidth=128, tileHeight=128, - sizeX=512, - sizeY=1024, + sizeX=511, + sizeY=1023, frames='c=2,z=3', ) copyFromSource(source, sink) @@ -227,7 +227,7 @@ def testImageCopySmallDownsampling(resample_method, tmp_path): written_arrays = dict(written._zarr.arrays()) assert len(written_arrays) == written.levels assert written_arrays.get('0') is not None - assert written_arrays.get('0').shape == (2, 3, 1024, 512, 3) + assert written_arrays.get('0').shape == (2, 3, 1023, 511, 3) assert written_arrays.get('1') is not None assert written_arrays.get('1').shape == (2, 3, 512, 256, 3)