Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Imaris Reader: support for LZ4 compression and performance improvements #4249

Merged
merged 17 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions ant/java.xml
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,30 @@ your FindBugs installation's lib directory. E.g.:
<path refid="runtime.classpath"/>
</unzip>

<!-- Merge ucar.nc2.filter.FilterProvider files -->
<pathconvert property="runtimePaths.string" refid="runtime.classpath" pathsep="${line.separator}"/>
<echo file="${build.dir}/unzip/runtimePaths.txt">${runtimePaths.string}</echo>
<loadfile property="cdmcorePath" srcFile="${build.dir}/unzip/runtimePaths.txt">
<filterchain>
<linecontains>
<contains value="cdm-core"/>
</linecontains>
<striplinebreaks/>
</filterchain>
</loadfile>
<loadfile property="formatsgplPath" srcFile="${build.dir}/unzip/runtimePaths.txt">
<filterchain>
<linecontains>
<contains value="formats-gpl"/>
</linecontains>
<striplinebreaks/>
</filterchain>
</loadfile>
<concat destfile="${build.dir}/unzip/META-INF/services/ucar.nc2.filter.FilterProvider" fixlastline="yes">
<zipentry zipfile="${cdmcorePath}" name="META-INF/services/ucar.nc2.filter.FilterProvider"/>
<zipentry zipfile="${formatsgplPath}" name="META-INF/services/ucar.nc2.filter.FilterProvider"/>
</concat>

<jar destfile="${artifact.dir}/${bundle.jar}" filesetmanifest="skip">
<zipfileset dir="${build.dir}/unzip"/>
<manifest>
Expand Down
5 changes: 5 additions & 0 deletions components/bundles/bioformats_package/assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,9 @@
<useProjectArtifact>false</useProjectArtifact>
</dependencySet>
</dependencySets>
<containerDescriptorHandlers>
<containerDescriptorHandler>
<handlerName>metaInf-services</handlerName>
</containerDescriptorHandler>
</containerDescriptorHandlers>
</assembly>
3 changes: 2 additions & 1 deletion components/formats-gpl/build.properties
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ component.deprecation = true

component.resources-bin = loci/formats/bio-formats-logo.png \
loci/formats/utests/2008-09.ome \
lib/**/*.dll
lib/**/*.dll \
META-INF/services/ucar.nc2.filter.FilterProvider
component.resources-text =

component.main-class = loci.formats.gui.ImageViewer
5 changes: 5 additions & 0 deletions components/formats-gpl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@
<artifactId>kryo</artifactId>
<version>${kryo.version}</version>
</dependency>
<dependency>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
<version>0.27</version>
</dependency>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was initially going to suggest removing this and updating ome-codecs instead, but realized we don't have a released version of ome-codecs that includes ome/ome-codecs#39.

In the context of this PR, I think I'm OK with leaving this dependency as-is. Separately, though, and before 8.1.0, we'll need to get ome-codecs released and updated so that we don't have two different versions of aircompressor.


<!-- NB: dependency:analyze has false warning about xml-apis:xml-apis. -->

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
loci.formats.filter.LZ4Filter$LZ4FilterProvider
98 changes: 98 additions & 0 deletions components/formats-gpl/src/loci/formats/filter/LZ4Filter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* #%L
* OME Bio-Formats package for reading and converting biological file formats.
* %%
* Copyright (C) 2005 - 2017 Open Microscopy Environment:
* - Board of Regents of the University of Wisconsin-Madison
* - Glencoe Software, Inc.
* - University of Dundee
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-2.0.html>.
* #L%
*/

package loci.formats.filter;

import ucar.nc2.filter.FilterProvider;
import ucar.nc2.filter.Filter;

import io.airlift.compress.lz4.Lz4Decompressor;

import java.util.Map;
import java.io.IOException;
import java.nio.ByteBuffer;

public class LZ4Filter extends Filter {

static final String name = "LZ4 filter";
static final int id = 32004;
private Lz4Decompressor decompressor;

public LZ4Filter(Map<String, Object> properties) {
decompressor = new Lz4Decompressor();
}

@Override
public String getName() {
return name;
}

@Override
public int getId() {
return id;
}

@Override
public byte[] encode(byte[] dataIn) throws IOException {
throw new UnsupportedOperationException("LZ4Filter does not support data compression");
}

@Override
public byte[] decode(byte[] dataIn) throws IOException {
ByteBuffer byteBuffer = ByteBuffer.wrap(dataIn);

long totalDecompressedSize = byteBuffer.getLong();
int decompressedBlockSize = byteBuffer.getInt();
int compressedBlockSize = byteBuffer.getInt();

byte[] decompressedBlock = new byte[Math.toIntExact(totalDecompressedSize)];
byte[] compressedBlock = new byte[compressedBlockSize];

byteBuffer.get(compressedBlock, 0, compressedBlockSize);
decompressor.decompress(compressedBlock, 0, compressedBlockSize, decompressedBlock, 0, decompressedBlockSize);

return decompressedBlock;
}

public static class LZ4FilterProvider implements FilterProvider {

@Override
public String getName() {
return name;
}

@Override
public int getId() {
return id;
}

@Override
public Filter create(Map<String, Object> properties) {
return new LZ4Filter(properties);
}

}

}
129 changes: 124 additions & 5 deletions components/formats-gpl/src/loci/formats/in/ImarisHDFReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Hashtable;

import loci.common.DataTools;
import loci.common.Location;
Expand Down Expand Up @@ -71,6 +72,15 @@ public class ImarisHDFReader extends SubResolutionFormatReader {
private List<double[]> colors;
private int lastChannel = 0;

// caching parameters
private long maxBufferSize = 1024 * 1024 * 1024;
private Object[] buffer = null;
private int[] blockSizeZPerResolution;
private int lastMinZ = 0;
private int lastMaxZ = 0;
private int lastRes = 0;
private int lastT = 0;

// -- Constructor --

/** Constructs a new Imaris HDF reader. */
Expand Down Expand Up @@ -223,6 +233,10 @@ public void close(boolean fileOnly) throws IOException {
pixelSizeX = pixelSizeY = pixelSizeZ = 0;
minX = minY = minZ = maxX = maxY = maxZ = 0;

lastRes = lastT = lastMinZ = lastMaxZ = 0;
buffer = null;
blockSizeZPerResolution = null;

if (netcdf != null) netcdf.close();
netcdf = null;

Expand Down Expand Up @@ -300,12 +314,22 @@ protected void initFile(String id) throws FormatException, IOException {
ms0.thumbnail = false;
ms0.dimensionOrder = "XYZCT";

// read block sizes for caching
blockSizeZPerResolution = new int[seriesCount];
for (int res = 0; res < seriesCount; res++) {
String datasetPath = "DataSet/ResolutionLevel_" + res + "/TimePoint_0/Channel_0/Data";
Hashtable<String, Object> table = netcdf.getVariableAttributes(datasetPath);
String chunkSizesString = (String) table.get("_ChunkSizes");
String[] sizes = chunkSizesString.split(" ");
blockSizeZPerResolution[res] = Integer.parseInt(sizes[0]);
}

// determine pixel type - this isn't stored in the metadata, so we need
// to check the pixels themselves

int type = -1;

Object pix = getImageData(0, 0, 0, 1, 1);
Object pix = getSampleData();
if (pix instanceof byte[][]) type = FormatTools.UINT8;
else if (pix instanceof short[][]) type = FormatTools.UINT16;
else if (pix instanceof int[][]) type = FormatTools.UINT32;
Expand Down Expand Up @@ -434,8 +458,7 @@ private Object getImageData(int no, int x, int y, int width, int height)
throws FormatException
{
int[] zct = getZCTCoords(no);
String path = "/DataSet/ResolutionLevel_" + getCoreIndex() + "/TimePoint_" +
zct[2] + "/Channel_" + zct[1] + "/Data";
int resolutionIndex = getCoreIndex();
Object image = null;

// the width and height cannot be 1, because then netCDF will give us a
Expand Down Expand Up @@ -467,14 +490,110 @@ private Object getImageData(int no, int x, int y, int width, int height)
x = (getSizeX() / 2) - 1;
}

int[] dimensions = new int[] {1, height, width};
int[] indices = new int[] {zct[0], y, x};
// cache dataset blocks to avoid multiple reads.
// use caching for 3D datasets and only if the size of the required buffer is < maxBufferSize
if (getSizeZ() > 1 && getSizeX() * getSizeY() * blockSizeZPerResolution[resolutionIndex] * getSizeC() * FormatTools.getBytesPerPixel(getPixelType()) < maxBufferSize) {
// update buffer if needed
if (zct[0] < lastMinZ || zct[0] > lastMaxZ || zct[2] != lastT || resolutionIndex != lastRes || buffer == null) {
buffer = new Object[getSizeC()];
if (resolutionIndex != lastRes) {
lastRes = resolutionIndex;
lastT = zct[2];
}
if (zct[2] != lastT) {
lastT = zct[2];
}

int blockNumber = zct[0] / blockSizeZPerResolution[resolutionIndex];
int[] dims = new int[] {blockSizeZPerResolution[resolutionIndex], getSizeY(), getSizeX()};
int[] idcs = new int[] {blockNumber * blockSizeZPerResolution[resolutionIndex], 0, 0};
try {
String path;
for (int ch = 0; ch < getSizeC(); ch++) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I understand the rationale for caching an entire block for a given channel given the way the data is stored, my understanding is that different channels are stored separately.

While there are scenarios where preloading all channels makes sense, would this add some unnecessary overhead in some others e.g. when only one channel is requested or in the case of a large number of channels where only a few are rendered?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Different channels are indeed stored separately.

If, when reading an image, planes were requested sequentially from a single channel (therefore moving along z before moving along channels) then the buffer could simply hold a stack of planes from that single channel. However, in my testing i saw that Bio-Formats (or Fiji?) requests the same image plane for all channels before moving to the next plane (moving along channels before moving along z). If the buffer just cached a stack from a single channel, the buffer would need to be updated every time, making it useless.

Let me know if I misunderstood your comment

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The order in which planes are being retrieved including the index of the channels to be retrieved is fundamentally the decision of the caller. In the case of Fiji, I assume a single reader will indeed iterate through all the active channels for the given timepoint/Z-section before moving onto the next ones.

Other applications might make different decisions or even use concurrent requests in which case the benefit of caching all the channels might be limited. We will need to assess the cost/benefit of the caching logic across these scenarios as part of the testing.

path = "/DataSet/ResolutionLevel_" + resolutionIndex + "/TimePoint_" + zct[2] + "/Channel_" + ch + "/Data";
buffer[ch] = netcdf.getArray(path, idcs, dims);
}
}
catch (ServiceException e) {
throw new FormatException(e);
}

lastMinZ = blockNumber * blockSizeZPerResolution[resolutionIndex];
lastMaxZ = lastMinZ + blockSizeZPerResolution[resolutionIndex] - 1;
}

// read from buffer
if (buffer[zct[1]] instanceof byte[][][]) {
byte[][][] byteBuffer = (byte[][][]) buffer[zct[1]];
byte[][] slice = new byte[height][width];
for (int i = y; i < y + height; i++) {
for (int j = x; j < x + width; j++) {
slice[i - y][j - x] = byteBuffer[zct[0] - lastMinZ][i][j];
}
}
image = (Object) slice;
}
else if (buffer[zct[1]] instanceof short[][][]) {
short[][][] shortBuffer = (short[][][]) buffer[zct[1]];
short[][] slice = new short[height][width];
for (int i = y; i < y + height; i++) {
for (int j = x; j < x + width; j++) {
slice[i - y][j - x] = shortBuffer[zct[0] - lastMinZ][i][j];
}
}
image = (Object) slice;
}
else if (buffer[zct[1]] instanceof int[][][]) {
int[][][] intBuffer = (int[][][]) buffer[zct[1]];
int[][] slice = new int[height][width];
for (int i = y; i < y + height; i++) {
for (int j = x; j < x + width; j++) {
slice[i - y][j - x] = intBuffer[zct[0] - lastMinZ][i][j];
}
}
image = (Object) slice;
}
else if (buffer[zct[1]] instanceof float[][][]) {
float[][][] floatBuffer = (float[][][]) buffer[zct[1]];
float[][] slice = new float[height][width];
for (int i = y; i < y + height; i++) {
for (int j = x; j < x + width; j++) {
slice[i - y][j - x] = floatBuffer[zct[0] - lastMinZ][i][j];
}
}
image = (Object) slice;
}
}
else {
int[] dimensions = new int[] {1, height, width};
int[] indices = new int[] {zct[0], y, x};
try {
String path = "/DataSet/ResolutionLevel_" + resolutionIndex + "/TimePoint_" + zct[2] + "/Channel_" + zct[1] + "/Data";
image = netcdf.getArray(path, indices, dimensions);
}
catch (ServiceException e) {
throw new FormatException(e);
}
}

return image;
}

private Object getSampleData()
throws FormatException
{
int resolutionIndex = getCoreIndex();
Object image = null;
int[] dimensions = new int[] {1, 2, 2};
int[] indices = new int[] {0, 0, 0};
try {
String path = "/DataSet/ResolutionLevel_" + resolutionIndex + "/TimePoint_0/Channel_0/Data";
image = netcdf.getArray(path, indices, dimensions);
}
catch (ServiceException e) {
throw new FormatException(e);
}

return image;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import ucar.nc2.Attribute;
import ucar.nc2.Group;
import ucar.nc2.NetcdfFile;
import ucar.nc2.NetcdfFiles;
import ucar.nc2.Variable;

import com.esotericsoftware.kryo.Kryo;
Expand Down Expand Up @@ -247,7 +248,7 @@ private void parseAttributesAndVariables(List<Group> groups) {
if (!groupName.endsWith("/")) variableName = "/" + variableName;
variableList.add(variableName);
}
groups = group.getGroups();
groups = (List<Group>) group.getGroups();
parseAttributesAndVariables(groups);
}
}
Expand Down Expand Up @@ -307,7 +308,7 @@ public void print(String s) { }
};
System.setOut(throwaway);
throwaway.close();
netCDFFile = NetcdfFile.open(currentId);
netCDFFile = NetcdfFiles.open(currentId);
System.setOut(outStream);
root = netCDFFile.getRootGroup();
}
Expand Down
Loading