-
Notifications
You must be signed in to change notification settings - Fork 248
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Imaris Reader: support for LZ4 compression and performance improvements #4249
Changes from all commits
40ceefa
37cba60
8941acc
db73caf
60a1cfe
2afc620
d15e633
4760003
a550ffe
32bcfcc
2c4dfbd
f0a0711
58e5dfd
860b334
fc379cc
50de137
d742ade
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
loci.formats.filter.LZ4Filter$LZ4FilterProvider |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* #%L | ||
* OME Bio-Formats package for reading and converting biological file formats. | ||
* %% | ||
* Copyright (C) 2005 - 2017 Open Microscopy Environment: | ||
* - Board of Regents of the University of Wisconsin-Madison | ||
* - Glencoe Software, Inc. | ||
* - University of Dundee | ||
* %% | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as | ||
* published by the Free Software Foundation, either version 2 of the | ||
* License, or (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public | ||
* License along with this program. If not, see | ||
* <http://www.gnu.org/licenses/gpl-2.0.html>. | ||
* #L% | ||
*/ | ||
|
||
package loci.formats.filter; | ||
marcobitplane marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
import ucar.nc2.filter.FilterProvider; | ||
import ucar.nc2.filter.Filter; | ||
|
||
import io.airlift.compress.lz4.Lz4Decompressor; | ||
|
||
import java.util.Map; | ||
import java.io.IOException; | ||
import java.nio.ByteBuffer; | ||
|
||
public class LZ4Filter extends Filter { | ||
|
||
static final String name = "LZ4 filter"; | ||
static final int id = 32004; | ||
private Lz4Decompressor decompressor; | ||
|
||
public LZ4Filter(Map<String, Object> properties) { | ||
decompressor = new Lz4Decompressor(); | ||
} | ||
|
||
@Override | ||
public String getName() { | ||
return name; | ||
} | ||
|
||
@Override | ||
public int getId() { | ||
return id; | ||
} | ||
|
||
@Override | ||
public byte[] encode(byte[] dataIn) throws IOException { | ||
throw new UnsupportedOperationException("LZ4Filter does not support data compression"); | ||
} | ||
|
||
@Override | ||
public byte[] decode(byte[] dataIn) throws IOException { | ||
ByteBuffer byteBuffer = ByteBuffer.wrap(dataIn); | ||
|
||
long totalDecompressedSize = byteBuffer.getLong(); | ||
int decompressedBlockSize = byteBuffer.getInt(); | ||
int compressedBlockSize = byteBuffer.getInt(); | ||
|
||
byte[] decompressedBlock = new byte[Math.toIntExact(totalDecompressedSize)]; | ||
byte[] compressedBlock = new byte[compressedBlockSize]; | ||
|
||
byteBuffer.get(compressedBlock, 0, compressedBlockSize); | ||
decompressor.decompress(compressedBlock, 0, compressedBlockSize, decompressedBlock, 0, decompressedBlockSize); | ||
|
||
return decompressedBlock; | ||
} | ||
|
||
public static class LZ4FilterProvider implements FilterProvider { | ||
|
||
@Override | ||
public String getName() { | ||
return name; | ||
} | ||
|
||
@Override | ||
public int getId() { | ||
return id; | ||
} | ||
|
||
@Override | ||
public Filter create(Map<String, Object> properties) { | ||
return new LZ4Filter(properties); | ||
} | ||
|
||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,7 @@ | |
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Hashtable; | ||
|
||
import loci.common.DataTools; | ||
import loci.common.Location; | ||
|
@@ -71,6 +72,15 @@ public class ImarisHDFReader extends SubResolutionFormatReader { | |
private List<double[]> colors; | ||
private int lastChannel = 0; | ||
|
||
// caching parameters | ||
private long maxBufferSize = 1024 * 1024 * 1024; | ||
private Object[] buffer = null; | ||
private int[] blockSizeZPerResolution; | ||
private int lastMinZ = 0; | ||
private int lastMaxZ = 0; | ||
private int lastRes = 0; | ||
private int lastT = 0; | ||
|
||
// -- Constructor -- | ||
|
||
/** Constructs a new Imaris HDF reader. */ | ||
|
@@ -223,6 +233,10 @@ public void close(boolean fileOnly) throws IOException { | |
pixelSizeX = pixelSizeY = pixelSizeZ = 0; | ||
minX = minY = minZ = maxX = maxY = maxZ = 0; | ||
|
||
lastRes = lastT = lastMinZ = lastMaxZ = 0; | ||
buffer = null; | ||
blockSizeZPerResolution = null; | ||
|
||
if (netcdf != null) netcdf.close(); | ||
netcdf = null; | ||
|
||
|
@@ -300,12 +314,22 @@ protected void initFile(String id) throws FormatException, IOException { | |
ms0.thumbnail = false; | ||
ms0.dimensionOrder = "XYZCT"; | ||
|
||
// read block sizes for caching | ||
blockSizeZPerResolution = new int[seriesCount]; | ||
for (int res = 0; res < seriesCount; res++) { | ||
String datasetPath = "DataSet/ResolutionLevel_" + res + "/TimePoint_0/Channel_0/Data"; | ||
Hashtable<String, Object> table = netcdf.getVariableAttributes(datasetPath); | ||
String chunkSizesString = (String) table.get("_ChunkSizes"); | ||
String[] sizes = chunkSizesString.split(" "); | ||
blockSizeZPerResolution[res] = Integer.parseInt(sizes[0]); | ||
} | ||
|
||
// determine pixel type - this isn't stored in the metadata, so we need | ||
// to check the pixels themselves | ||
|
||
int type = -1; | ||
|
||
Object pix = getImageData(0, 0, 0, 1, 1); | ||
Object pix = getSampleData(); | ||
if (pix instanceof byte[][]) type = FormatTools.UINT8; | ||
else if (pix instanceof short[][]) type = FormatTools.UINT16; | ||
else if (pix instanceof int[][]) type = FormatTools.UINT32; | ||
|
@@ -434,8 +458,7 @@ private Object getImageData(int no, int x, int y, int width, int height) | |
throws FormatException | ||
{ | ||
int[] zct = getZCTCoords(no); | ||
String path = "/DataSet/ResolutionLevel_" + getCoreIndex() + "/TimePoint_" + | ||
zct[2] + "/Channel_" + zct[1] + "/Data"; | ||
int resolutionIndex = getCoreIndex(); | ||
Object image = null; | ||
|
||
// the width and height cannot be 1, because then netCDF will give us a | ||
|
@@ -467,14 +490,110 @@ private Object getImageData(int no, int x, int y, int width, int height) | |
x = (getSizeX() / 2) - 1; | ||
} | ||
|
||
int[] dimensions = new int[] {1, height, width}; | ||
int[] indices = new int[] {zct[0], y, x}; | ||
// cache dataset blocks to avoid multiple reads. | ||
// use caching for 3D datasets and only if the size of the required buffer is < maxBufferSize | ||
if (getSizeZ() > 1 && getSizeX() * getSizeY() * blockSizeZPerResolution[resolutionIndex] * getSizeC() * FormatTools.getBytesPerPixel(getPixelType()) < maxBufferSize) { | ||
// update buffer if needed | ||
if (zct[0] < lastMinZ || zct[0] > lastMaxZ || zct[2] != lastT || resolutionIndex != lastRes || buffer == null) { | ||
buffer = new Object[getSizeC()]; | ||
if (resolutionIndex != lastRes) { | ||
lastRes = resolutionIndex; | ||
lastT = zct[2]; | ||
} | ||
if (zct[2] != lastT) { | ||
lastT = zct[2]; | ||
} | ||
|
||
int blockNumber = zct[0] / blockSizeZPerResolution[resolutionIndex]; | ||
int[] dims = new int[] {blockSizeZPerResolution[resolutionIndex], getSizeY(), getSizeX()}; | ||
int[] idcs = new int[] {blockNumber * blockSizeZPerResolution[resolutionIndex], 0, 0}; | ||
try { | ||
String path; | ||
for (int ch = 0; ch < getSizeC(); ch++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While I understand the rationale for caching an entire block for a given channel given the way the data is stored, my understanding is that different channels are stored separately. While there are scenarios where preloading all channels makes sense, would this add some unnecessary overhead in some others e.g. when only one channel is requested or in the case of a large number of channels where only a few are rendered? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Different channels are indeed stored separately. If, when reading an image, planes were requested sequentially from a single channel (therefore moving along z before moving along channels) then the buffer could simply hold a stack of planes from that single channel. However, in my testing i saw that Bio-Formats (or Fiji?) requests the same image plane for all channels before moving to the next plane (moving along channels before moving along z). If the buffer just cached a stack from a single channel, the buffer would need to be updated every time, making it useless. Let me know if I misunderstood your comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The order in which planes are being retrieved including the index of the channels to be retrieved is fundamentally the decision of the caller. In the case of Fiji, I assume a single reader will indeed iterate through all the active channels for the given timepoint/Z-section before moving onto the next ones. Other applications might make different decisions or even use concurrent requests in which case the benefit of caching all the channels might be limited. We will need to assess the cost/benefit of the caching logic across these scenarios as part of the testing. |
||
path = "/DataSet/ResolutionLevel_" + resolutionIndex + "/TimePoint_" + zct[2] + "/Channel_" + ch + "/Data"; | ||
buffer[ch] = netcdf.getArray(path, idcs, dims); | ||
} | ||
} | ||
catch (ServiceException e) { | ||
throw new FormatException(e); | ||
} | ||
|
||
lastMinZ = blockNumber * blockSizeZPerResolution[resolutionIndex]; | ||
lastMaxZ = lastMinZ + blockSizeZPerResolution[resolutionIndex] - 1; | ||
} | ||
|
||
// read from buffer | ||
if (buffer[zct[1]] instanceof byte[][][]) { | ||
byte[][][] byteBuffer = (byte[][][]) buffer[zct[1]]; | ||
byte[][] slice = new byte[height][width]; | ||
for (int i = y; i < y + height; i++) { | ||
for (int j = x; j < x + width; j++) { | ||
slice[i - y][j - x] = byteBuffer[zct[0] - lastMinZ][i][j]; | ||
} | ||
} | ||
image = (Object) slice; | ||
} | ||
else if (buffer[zct[1]] instanceof short[][][]) { | ||
short[][][] shortBuffer = (short[][][]) buffer[zct[1]]; | ||
short[][] slice = new short[height][width]; | ||
for (int i = y; i < y + height; i++) { | ||
for (int j = x; j < x + width; j++) { | ||
slice[i - y][j - x] = shortBuffer[zct[0] - lastMinZ][i][j]; | ||
} | ||
} | ||
image = (Object) slice; | ||
} | ||
else if (buffer[zct[1]] instanceof int[][][]) { | ||
int[][][] intBuffer = (int[][][]) buffer[zct[1]]; | ||
int[][] slice = new int[height][width]; | ||
for (int i = y; i < y + height; i++) { | ||
for (int j = x; j < x + width; j++) { | ||
slice[i - y][j - x] = intBuffer[zct[0] - lastMinZ][i][j]; | ||
} | ||
} | ||
image = (Object) slice; | ||
} | ||
else if (buffer[zct[1]] instanceof float[][][]) { | ||
float[][][] floatBuffer = (float[][][]) buffer[zct[1]]; | ||
float[][] slice = new float[height][width]; | ||
for (int i = y; i < y + height; i++) { | ||
for (int j = x; j < x + width; j++) { | ||
slice[i - y][j - x] = floatBuffer[zct[0] - lastMinZ][i][j]; | ||
} | ||
} | ||
image = (Object) slice; | ||
} | ||
} | ||
else { | ||
int[] dimensions = new int[] {1, height, width}; | ||
int[] indices = new int[] {zct[0], y, x}; | ||
try { | ||
String path = "/DataSet/ResolutionLevel_" + resolutionIndex + "/TimePoint_" + zct[2] + "/Channel_" + zct[1] + "/Data"; | ||
image = netcdf.getArray(path, indices, dimensions); | ||
} | ||
catch (ServiceException e) { | ||
throw new FormatException(e); | ||
} | ||
} | ||
|
||
return image; | ||
} | ||
|
||
private Object getSampleData() | ||
throws FormatException | ||
{ | ||
int resolutionIndex = getCoreIndex(); | ||
Object image = null; | ||
int[] dimensions = new int[] {1, 2, 2}; | ||
int[] indices = new int[] {0, 0, 0}; | ||
try { | ||
String path = "/DataSet/ResolutionLevel_" + resolutionIndex + "/TimePoint_0/Channel_0/Data"; | ||
image = netcdf.getArray(path, indices, dimensions); | ||
} | ||
catch (ServiceException e) { | ||
throw new FormatException(e); | ||
} | ||
|
||
return image; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was initially going to suggest removing this and updating ome-codecs instead, but realized we don't have a released version of ome-codecs that includes ome/ome-codecs#39.
In the context of this PR, I think I'm OK with leaving this dependency as-is. Separately, though, and before 8.1.0, we'll need to get ome-codecs released and updated so that we don't have two different versions of aircompressor.