From a55738d46a06c412209754e215df909e38cfb052 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 26 Jun 2024 23:16:13 -0400 Subject: [PATCH] Convert H5Z*.jl packages into package extensions --- Project.toml | 15 + ext/BloscExt/BloscExt.jl | 221 ++++++++++ .../H5Zblosc => ext/BloscExt}/LICENSE.txt | 0 ext/CodecBzip2Ext/CodecBzip2Ext.jl | 235 +++++++++++ .../CodecBzip2Ext}/LICENSE.txt | 0 ext/CodecBzip2Ext/README.md | 6 + .../CodecBzip2Ext}/THIRDPARTY.txt | 0 ext/CodecLz4Ext/CodecLz4Ext.jl | 231 +++++++++++ .../H5Zlz4 => ext/CodecLz4Ext}/LICENSE.txt | 0 ext/CodecLz4Ext/README.md | 6 + .../H5Zlz4 => ext/CodecLz4Ext}/THIRDPARTY.txt | 4 +- ext/CodecZstdExt/CodecZstdExt.jl | 128 ++++++ .../H5Zzstd => ext/CodecZstdExt}/LICENSE.txt | 0 ext/CodecZstdExt/Project.toml.old | 12 + ext/CodecZstdExt/README.md | 8 + .../CodecZstdExt}/THIRDPARTY.txt | 0 ext/bitshuffle_jll_ext/LICENSE.txt | 23 ++ ext/bitshuffle_jll_ext/README.md | 6 + ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl | 384 ++++++++++++++++++ .../H5Zbitshuffle/src/.H5Zbitshuffle.jl.swp | Bin 0 -> 12288 bytes filters/H5Zbitshuffle/src/H5Zbitshuffle.jl | 368 +---------------- filters/H5Zblosc/src/H5Zblosc.jl | 203 +-------- filters/H5Zbzip2/src/H5Zbzip2.jl | 231 +---------- filters/H5Zlz4/src/H5Zlz4.jl | 221 +--------- filters/H5Zzstd/Project.toml | 2 +- filters/H5Zzstd/README.md | 9 +- filters/H5Zzstd/src/H5Zzstd.jl | 134 +----- 27 files changed, 1339 insertions(+), 1108 deletions(-) create mode 100644 ext/BloscExt/BloscExt.jl rename {filters/H5Zblosc => ext/BloscExt}/LICENSE.txt (100%) create mode 100644 ext/CodecBzip2Ext/CodecBzip2Ext.jl rename {filters/H5Zbzip2 => ext/CodecBzip2Ext}/LICENSE.txt (100%) create mode 100644 ext/CodecBzip2Ext/README.md rename {filters/H5Zbzip2 => ext/CodecBzip2Ext}/THIRDPARTY.txt (100%) create mode 100644 ext/CodecLz4Ext/CodecLz4Ext.jl rename {filters/H5Zlz4 => ext/CodecLz4Ext}/LICENSE.txt (100%) create mode 100644 ext/CodecLz4Ext/README.md rename {filters/H5Zlz4 => ext/CodecLz4Ext}/THIRDPARTY.txt (95%) create mode 100644 ext/CodecZstdExt/CodecZstdExt.jl rename {filters/H5Zzstd => ext/CodecZstdExt}/LICENSE.txt (100%) create mode 100644 ext/CodecZstdExt/Project.toml.old create mode 100644 ext/CodecZstdExt/README.md rename {filters/H5Zzstd => ext/CodecZstdExt}/THIRDPARTY.txt (100%) create mode 100644 ext/bitshuffle_jll_ext/LICENSE.txt create mode 100644 ext/bitshuffle_jll_ext/README.md create mode 100644 ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl create mode 100644 filters/H5Zbitshuffle/src/.H5Zbitshuffle.jl.swp diff --git a/Project.toml b/Project.toml index d27dbd6b4..7931b1c9e 100644 --- a/Project.toml +++ b/Project.toml @@ -16,12 +16,27 @@ MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" [weakdeps] MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +bitshuffle_jll = "228fe19c-1b83-5282-a626-13744502a320" +Blosc = "a74b3585-a348-5f62-a45c-50e91977d574" +CodecBzip2 = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd" +CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561" +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" [extensions] MPIExt = "MPI" +CodecBzip2Ext = "CodecBzip2" +CodecLz4Ext = "CodecLz4" +CodecZstdExt = "CodecZstd" +bitshuffle_jll_ext = "bitshuffle_jll" +BloscExt = "Blosc" [compat] +bitshuffle_jll = "0.4.2, 0.5" +Blosc = "0.7.3" Compat = "3.1.0, 4" +CodecBzip2 = "0.7, 0.8" +CodecLz4 = "0.4" +CodecZstd = "0.7, 0.8" HDF5_jll = "~1.10.5, ~1.12.0, ~1.14.0" MPI = "0.20" MPIPreferences = "0.1.7" diff --git a/ext/BloscExt/BloscExt.jl b/ext/BloscExt/BloscExt.jl new file mode 100644 index 000000000..26901247e --- /dev/null +++ b/ext/BloscExt/BloscExt.jl @@ -0,0 +1,221 @@ +module BloscExt +# port of https://github.com/Blosc/c-blosc/blob/3a668dcc9f61ad22b5c0a0ab45fe8dad387277fd/hdf5/blosc_filter.c (copyright 2010 Francesc Alted, license: MIT/expat) + +import Blosc +using HDF5.API +import HDF5.Filters: Filter, FilterPipeline +import HDF5.Filters: + filterid, + register_filter, + filtername, + filter_func, + filter_cfunc, + set_local_func, + set_local_cfunc +import HDF5.Filters.Shuffle + +export H5Z_FILTER_BLOSC, blosc_filter, BloscFilter + +# Import Blosc shuffle constants +import Blosc: NOSHUFFLE, SHUFFLE, BITSHUFFLE + +const H5Z_FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc +const FILTER_BLOSC_VERSION = 2 +const blosc_name = "blosc" + +function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) + blosc_flags = Ref{Cuint}() + blosc_values = Vector{Cuint}(undef, 8) + blosc_nelements = Ref{Csize_t}(length(blosc_values)) + blosc_chunkdims = Vector{API.hsize_t}(undef, 32) + + API.h5p_get_filter_by_id( + dcpl, + H5Z_FILTER_BLOSC, + blosc_flags, + blosc_nelements, + blosc_values, + 0, + C_NULL, + C_NULL + ) + flags = blosc_flags[] + + nelements = max(blosc_nelements[], 4) # First 4 slots reserved + + # Set Blosc info in first two slots + blosc_values[1] = FILTER_BLOSC_VERSION + blosc_values[2] = Blosc.VERSION_FORMAT + + ndims = API.h5p_get_chunk(dcpl, 32, blosc_chunkdims) + chunksize = prod(resize!(blosc_chunkdims, ndims)) + if ndims < 0 || ndims > 32 || chunksize > Blosc.MAX_BUFFERSIZE + return API.herr_t(-1) + end + + htypesize = API.h5t_get_size(htype) + if API.h5t_get_class(htype) == API.H5T_ARRAY + hsuper = API.h5t_get_super(htype) + basetypesize = API.h5t_get_size(hsuper) + API.h5t_close(hsuper) + else + basetypesize = htypesize + end + + # Limit large typesizes (they are pretty inefficient to shuffle + # and, in addition, Blosc does not handle typesizes larger than + # blocksizes). + if basetypesize > Blosc.MAX_TYPESIZE + basetypesize = 1 + end + blosc_values[3] = basetypesize + blosc_values[4] = chunksize * htypesize # size of the chunk + + API.h5p_modify_filter(dcpl, H5Z_FILTER_BLOSC, flags, nelements, blosc_values) + + return API.herr_t(1) +end + +function blosc_filter( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +) + typesize = unsafe_load(cd_values, 3) # The datatype size + outbuf_size = unsafe_load(cd_values, 4) + # Compression level: + clevel = cd_nelmts >= 5 ? unsafe_load(cd_values, 5) : Cuint(5) + # Do shuffle: + doshuffle = cd_nelmts >= 6 ? unsafe_load(cd_values, 6) : SHUFFLE + + if (flags & API.H5Z_FLAG_REVERSE) == 0 # compressing + # Allocate an output buffer exactly as long as the input data; if + # the result is larger, we simply return 0. The filter is flagged + # as optional, so HDF5 marks the chunk as uncompressed and proceeds. + outbuf_size = unsafe_load(buf_size) + outbuf = Libc.malloc(outbuf_size) + outbuf == C_NULL && return Csize_t(0) + + compname = if cd_nelmts >= 7 + compcode = unsafe_load(cd_values, 7) + Blosc.compname(compcode) + else + "blosclz" + end + Blosc.set_compressor(compname) + status = Blosc.blosc_compress( + clevel, doshuffle, typesize, nbytes, unsafe_load(buf), outbuf, nbytes + ) + status < 0 && (Libc.free(outbuf); return Csize_t(0)) + else # decompressing + # Extract the exact outbuf_size from the buffer header. + # + # NOTE: the guess value got from "cd_values" corresponds to the + # uncompressed chunk size but it should not be used in a general + # cases since other filters in the pipeline can modify the buffer + # size. + in = unsafe_load(buf) + # See https://github.com/JuliaLang/julia/issues/43402 + # Resolved in https://github.com/JuliaLang/julia/pull/43408 + outbuf_size, cbytes, blocksize = Blosc.cbuffer_sizes(in) + outbuf = Libc.malloc(outbuf_size) + outbuf == C_NULL && return Csize_t(0) + status = Blosc.blosc_decompress(in, outbuf, outbuf_size) + status <= 0 && (Libc.free(outbuf); return Csize_t(0)) + end + + if status != 0 + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, outbuf_size) + return Csize_t(status) # size of compressed/decompressed data + end + Libc.free(outbuf) + return Csize_t(0) +end + +""" + BloscFilter(;level=5, shuffle=true, compressor="blosclz") + +The Blosc compression filter, using [Blosc.jl](https://github.com/JuliaIO/Blosc.jl). Options: + + - `level`: compression level + - `shuffle`: whether to shuffle data before compressing (this option should be used instead of the [`Shuffle`](@ref) filter) + - `compressor`: the compression algorithm. Call `Blosc.compressors()` for the available compressors. + +# External links +* [What Is Blosc?](https://www.blosc.org/pages/blosc-in-depth/) +* [Blosc HDF5 Filter ID 32001](https://portal.hdfgroup.org/display/support/Filters#Filters-32001) +* [Blosc HDF5 Plugin Repository (C code)](https://github.com/Blosc/hdf5-blosc) +""" +struct BloscFilter <: Filter + blosc_version::Cuint + version_format::Cuint + typesize::Cuint + bufsize::Cuint + level::Cuint + shuffle::Cuint + compcode::Cuint +end + +function BloscFilter(; level=5, shuffle=SHUFFLE, compressor="blosclz") + Blosc.isvalidshuffle(shuffle) || throw(ArgumentError("invalid blosc shuffle $shuffle")) + compcode = Blosc.compcode(compressor) + BloscFilter(0, 0, 0, 0, level, shuffle, compcode) +end + +filterid(::Type{BloscFilter}) = H5Z_FILTER_BLOSC +filtername(::Type{BloscFilter}) = blosc_name +set_local_func(::Type{BloscFilter}) = blosc_set_local +set_local_cfunc(::Type{BloscFilter}) = + @cfunction(blosc_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) +filter_func(::Type{BloscFilter}) = blosc_filter +filter_cfunc(::Type{BloscFilter}) = @cfunction( + blosc_filter, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function Base.show(io::IO, blosc::BloscFilter) + print( + io, + BloscFilter, + "(level=", + Int(blosc.level), + ",shuffle=", + blosc.shuffle == NOSHUFFLE ? "NOSHUFFLE" : + blosc.shuffle == SHUFFLE ? "SHUFFLE" : + blosc.shuffle == BITSHUFFLE ? "BITSHUFFLE" : + "UNKNOWN", + ",compressor=", + Blosc.compname(blosc.compcode), + ")" + ) +end + +function Base.push!(f::FilterPipeline, blosc::BloscFilter) + 0 <= blosc.level <= 9 || + throw(ArgumentError("blosc compression $(blosc.level) not in [0,9]")) + Blosc.isvalidshuffle(blosc.shuffle) || + throw(ArgumentError("invalid blosc shuffle $(blosc.shuffle)")) + ref = Ref(blosc) + GC.@preserve ref begin + API.h5p_set_filter( + f.plist, + filterid(BloscFilter), + API.H5Z_FLAG_OPTIONAL, + div(sizeof(BloscFilter), sizeof(Cuint)), + pointer_from_objref(ref) + ) + end + return f +end + +function __init__() + register_filter(BloscFilter) +end + +end # module BloscExt diff --git a/filters/H5Zblosc/LICENSE.txt b/ext/BloscExt/LICENSE.txt similarity index 100% rename from filters/H5Zblosc/LICENSE.txt rename to ext/BloscExt/LICENSE.txt diff --git a/ext/CodecBzip2Ext/CodecBzip2Ext.jl b/ext/CodecBzip2Ext/CodecBzip2Ext.jl new file mode 100644 index 000000000..d96662976 --- /dev/null +++ b/ext/CodecBzip2Ext/CodecBzip2Ext.jl @@ -0,0 +1,235 @@ +#= +The code below has been ported to Julia from the original C source: +https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/BZIP2/src/H5Zbzip2.c +The filter function H5Z_filter_bzip2 was adopted from: +PyTables http://www.pytables.org. +The plugin can be used with the HDF5 library version 1.8.11+ to read HDF5 datasets compressed with bzip2 created by PyTables. +License: licenses/H5Zbzip2_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module CodecBzip2Ext + +using CodecBzip2 +import CodecBzip2: libbzip2 +using HDF5.API +import HDF5.Filters: + Filter, filterid, register_filter, filtername, filter_func, filter_cfunc + +export H5Z_FILTER_BZIP2, H5Z_filter_bzip2, Bzip2Filter + +const H5Z_FILTER_BZIP2 = API.H5Z_filter_t(307) +const bzip2_name = "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html" + +function H5Z_filter_bzip2( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + outbuf = C_NULL + outdatalen = Cuint(0) + + # Prepare the output buffer + + try + if flags & API.H5Z_FLAG_REVERSE != 0 + # Decompress + + outbuflen = nbytes * 3 + 1 + outbuf = Libc.malloc(outbuflen) + if outbuf == C_NULL + error("H5Zbzip2: memory allocation failed for bzip2 decompression.") + end + + stream = CodecBzip2.BZStream() + # Just use default malloc and free + stream.bzalloc = C_NULL + stream.bzfree = C_NULL + # BZ2_bzDecompressInit + ret = CodecBzip2.decompress_init!(stream, 0, false) + if ret != CodecBzip2.BZ_OK + errror("H5Zbzip2: bzip2 decompress start failed with error $ret.") + end + + stream.next_out = outbuf + stream.avail_out = outbuflen + stream.next_in = unsafe_load(buf) + stream.avail_in = nbytes + + cont = true + + while cont + # BZ2_bzDecompress + ret = CodecBzip2.decompress!(stream) + if ret < 0 + error("H5Zbzip2: bzip2 decompression failed with error $ret.") + end + cont = ret != CodecBzip2.BZ_STREAM_END + if cont && stream.avail_out == 0 + # Grow the output buffer + newbuflen = outbuflen * 2 + newbuf = Libc.realloc(outbuf, newbuflen) + if newbuf == C_NULL + error("H5Zbzip2: memory allocation failed for bzip2 decompression.") + end + stream.next_out = newbuf + outbuflen + stream.avail_out = outbuflen + outbuf = newbuf + outbuflen = newbuflen + end + end + + outdatalen = stream.total_out_lo32 + # BZ2_bzDecompressEnd + ret = CodecBzip2.decompress_end!(stream) + if ret != CodecBzip2.BZ_OK + error("H5Zbzip2: bzip2 compression end failed with error $ret.") + end + else + # Compress data + + # Maybe not the same size as outdatalen + odatalen = Cuint(0) + blockSize100k = 9 + + # Get compression blocksize if present + if cd_nelmts > 0 + blockSize100k = unsafe_load(cd_values) + if blockSize100k < 1 || blockSize100k > 9 + error("H5Zbzip2: Invalid compression blocksize: $blockSize100k") + end + end + + # Prepare the output buffer + outbuflen = nbytes + nbytes ÷ 100 + 600 # worse case (bzip2 docs) + outbuf = Libc.malloc(outbuflen) + @debug "Allocated" outbuflen outbuf + if outbuf == C_NULL + error("H5Zbzip2: Memory allocation failed for bzip2 compression") + end + + # Compress data + odatalen = outbuflen + r_odatalen = Ref{Cuint}(odatalen) + ret = BZ2_bzBuffToBuffCompress( + outbuf, r_odatalen, unsafe_load(buf), nbytes, blockSize100k, 0, 0 + ) + outdatalen = r_odatalen[] + if ret != CodecBzip2.BZ_OK + error("H5Zbzip2: bzip2 compression failed with error $ret.") + end + end # if flags & API.H5Z_FLAG_REVERSE != 0 + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, outbuflen) + + catch err + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + outdatalen = Csize_t(0) + if outbuf != C_NULL + Libc.free(outbuf) + end + @error "H5Zbzip2.jl Non-Fatal ERROR: " err + display(stacktrace(catch_backtrace())) + end # try - catch + + return Csize_t(outdatalen) +end # function H5Z_filter_bzip2 + +# Need stdcall for 32-bit Windows? +function BZ2_bzBuffToBuffCompress( + dest, destLen, source, sourceLen, blockSize100k, verbosity, workFactor +) + @static if CodecBzip2.WIN32 + return ccall( + ("BZ2_bzBuffToBuffCompress@28", libbzip2), + stdcall, + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + blockSize100k, + verbosity, + workFactor + ) + else + return ccall( + (:BZ2_bzBuffToBuffCompress, libbzip2), + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + blockSize100k, + verbosity, + workFactor + ) + end +end + +function BZ2_bzBuffToBuffDecompress(dest, destLen, source, sourceLen, small, verbosity) + @static if CodecBzip2.WIN32 + return ccall( + ("BZ2_bzBuffToBuffDecompress@24", libbzip2), + stdcall, + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + small, + verbosity + ) + else + return ccall( + (:BZ2_bzBuffToBuffDecompress, libbzip2), + Cint, + (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), + dest, + destLen, + source, + sourceLen, + small, + verbosity + ) + end +end + +# Filters Module + +""" + Bzip2Filter(blockSize100k) + +Apply Bzip2 compression. The filter id is $H5Z_FILTER_BZIP2. + +# External Links +* [BZIP2 HDF5 Filter ID 307](https://portal.hdfgroup.org/display/support/Filters#Filters-307) +* [PyTables Repository (C code)](https://github.com/PyTables/PyTables) +""" +struct Bzip2Filter <: Filter + blockSize100k::Cuint +end +Bzip2Filter() = Bzip2Filter(9) + +filterid(::Type{Bzip2Filter}) = H5Z_FILTER_BZIP2 +filtername(::Type{Bzip2Filter}) = bzip2_name +filter_func(::Type{Bzip2Filter}) = H5Z_filter_bzip2 +filter_cfunc(::Type{Bzip2Filter}) = @cfunction( + H5Z_filter_bzip2, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(Bzip2Filter) +end + +end # module CodecBzip2Ext diff --git a/filters/H5Zbzip2/LICENSE.txt b/ext/CodecBzip2Ext/LICENSE.txt similarity index 100% rename from filters/H5Zbzip2/LICENSE.txt rename to ext/CodecBzip2Ext/LICENSE.txt diff --git a/ext/CodecBzip2Ext/README.md b/ext/CodecBzip2Ext/README.md new file mode 100644 index 000000000..3d767db59 --- /dev/null +++ b/ext/CodecBzip2Ext/README.md @@ -0,0 +1,6 @@ +# H5Zbzip2.jl + +Implements the Bzip2 filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. +See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zbzip2.jl) + +This implements [HDF5 registered filter id 307](https://portal.hdfgroup.org/display/support/Filters#Filters-307). \ No newline at end of file diff --git a/filters/H5Zbzip2/THIRDPARTY.txt b/ext/CodecBzip2Ext/THIRDPARTY.txt similarity index 100% rename from filters/H5Zbzip2/THIRDPARTY.txt rename to ext/CodecBzip2Ext/THIRDPARTY.txt diff --git a/ext/CodecLz4Ext/CodecLz4Ext.jl b/ext/CodecLz4Ext/CodecLz4Ext.jl new file mode 100644 index 000000000..9fc1a0240 --- /dev/null +++ b/ext/CodecLz4Ext/CodecLz4Ext.jl @@ -0,0 +1,231 @@ +#= +This is a port of H5Zlz4.c to Julia +https://github.com/HDFGroup/hdf5_plugins/blob/master/LZ4/src/H5Zlz4.c +https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/LZ4/src/H5Zlz4.c +https://github.com/silx-kit/hdf5plugin/blob/main/src/LZ4/H5Zlz4.c + +H5Zlz4 is originally a copyright of HDF Group. License: licenses/H5Zlz4_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module CodecLz4Ext + +using CodecLz4 +using HDF5.API +import HDF5.Filters: + Filter, filterid, register_filter, filtername, filter_func, filter_cfunc + +export H5Z_FILTER_LZ4, H5Z_filter_lz4, Lz4Filter + +const H5Z_FILTER_LZ4 = API.H5Z_filter_t(32004) + +const DEFAULT_BLOCK_SIZE = 1 << 30 +const lz4_name = "HDF5 lz4 filter; see http://www.hdfgroup.org/services/contributions.html" + +const LZ4_AGGRESSION = Ref(1) + +# flags H5Z_FLAG_REVERSE or H5Z_FLAG_OPTIONAL +# cd_nelmts number of elements in cd_values (0 or 1) +# cd_values the first optional element must be the blockSize +# nbytes - number of valid bytes of data +# buf_size - total size of buffer +# buf - pointer to pointer of data +function H5Z_filter_lz4( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + outBuf = C_NULL + ret_value = Csize_t(0) + + try + if (flags & API.H5Z_FLAG_REVERSE) != 0 # reverse filter, decompressing + #i32Buf = Ref{UInt32}() + blockSize = UInt32(0) + roBuf = Ref{UInt8}() + rpos = Ptr{UInt8}(unsafe_load(buf)) + #i64Buf = Ptr{UInt64}(rpos) + # Load the first 8 bytes from buffer as a big endian UInt64 + # This is the original size of the buffer + origSize = ntoh(unsafe_load(Ptr{UInt64}(rpos))) + rpos += 8 # advance the pointer + + # Next read the next four bytes from the buffer as a big endian UInt32 + # This is the blocksize + #i32Buf[] = rpos + blockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) + rpos += 4 + if blockSize > origSize + blockSize = origSize + end + + # malloc a byte buffer of origSize + # outBuf = Vector{UInt8}(undef, origSize) + @debug "OrigSize" origSize + outBuf = Libc.malloc(origSize) + # Julia should throw an error if it cannot allocate this + roBuf = Ptr{UInt8}(outBuf) + decompSize = 0 + # Start with the first blockSize + while decompSize < origSize + # compressedBlockSize = UInt32(0) + if origSize - decompSize < blockSize # the last block can be smaller than block size + blockSize = origSize - decompSize + end + + #i32Buf[] = rpos + compressedBlockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) + rpos += 4 + + if compressedBlockSize == blockSize + # There was no compression + # memcpy(roBuf, rpos, blockSize) + unsafe_copyto!(roBuf, rpos, blockSize) + decompressedBytes = blockSize + else + # do the compression + # LZ4_decompress_fast, version number 10300 ? + @debug "decompress_safe" rpos roBuf compressedBlockSize ( + origSize - decompSize + ) + decompressedBytes = CodecLz4.LZ4_decompress_safe( + rpos, roBuf, compressedBlockSize, origSize - decompSize + ) + @debug "decompressedBytes" decompressedBytes + end + + rpos += compressedBlockSize + roBuf += blockSize + decompSize += decompressedBytes + end + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outBuf) + outBuf = C_NULL + ret_value = Csize_t(origSize) + else + # forward filter + # compressing + #i64Buf = Ref{UInt64}() + #i32Buf = Ref{UInt32}() + + if nbytes > typemax(Int32) + error("Can only compress chunks up to 2GB") + end + blockSize = unsafe_load(cd_values) + if cd_nelmts > 0 && blockSize > 0 + else + blockSize = DEFAULT_BLOCK_SIZE + end + if blockSize > nbytes + blockSize = nbytes + end + nBlocks = (nbytes - 1) ÷ blockSize + 1 + maxDestSize = + nBlocks * CodecLz4.LZ4_compressBound(blockSize) + 4 + 8 + nBlocks * 4 + outBuf = Libc.malloc(maxDestSize) + + rpos = Ptr{UInt8}(unsafe_load(buf)) + roBuf = Ptr{UInt8}(outBuf) + + # Header + unsafe_store!(Ptr{UInt64}(roBuf), hton(UInt64(nbytes))) + roBuf += 8 + + unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(blockSize))) + roBuf += 4 + + outSize = 12 + + for block in 0:(nBlocks - 1) + # compBlockSize::UInt32 + origWritten = Csize_t(block * blockSize) + if nbytes - origWritten < blockSize # the last block may be < blockSize + blockSize = nbytes - origWritten + end + + # aggression = 1 is the same LZ4_compress_default + @debug "LZ4_compress_fast args" rpos outBuf roBuf roBuf + 4 blockSize nBlocks CodecLz4.LZ4_compressBound( + blockSize + ) + compBlockSize = UInt32( + CodecLz4.LZ4_compress_fast( + rpos, + roBuf + 4, + blockSize, + CodecLz4.LZ4_compressBound(blockSize), + LZ4_AGGRESSION[] + ) + ) + @debug "Compressed block size" compBlockSize + + if compBlockSize == 0 + error("Could not compress block $block") + end + + if compBlockSize >= blockSize # compression did not save any space, do a memcpy instead + compBlockSize = blockSize + unsafe_copyto!(roBuf + 4, rpos, blockSize) + end + + unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(compBlockSize))) # write blocksize + roBuf += 4 + + rpos += blockSize + roBuf += compBlockSize + outSize += compBlockSize + 4 + end + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outBuf) + unsafe_store!(buf_size, outSize) + outBuf = C_NULL + ret_value = Csize_t(outSize) + end # (flags & API.H5Z_FLAG_REVERSE) != 0 + + catch err + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + ret_value = Csize_t(0) + @error "H5Zlz4.jl Non-Fatal ERROR: " err + display(stacktrace(catch_backtrace())) + finally + if outBuf != C_NULL + Libc.free(outBuf) + end + end + return Csize_t(ret_value) +end + +# Filters Module + +""" + Lz4Filter(blockSize) + +Apply LZ4 compression. `blockSize` is the main argument. The filter id is $H5Z_FILTER_LZ4. + +# External Links +* [LZ4 HDF5 Filter ID 32004](https://portal.hdfgroup.org/display/support/Filters#Filters-32004) +* [LZ4 HDF5 Plugin Repository (C code)](https://github.com/nexusformat/HDF5-External-Filter-Plugins/tree/master/LZ4) +""" +struct Lz4Filter <: Filter + blockSize::Cuint +end +Lz4Filter() = Lz4Filter(DEFAULT_BLOCK_SIZE) + +filterid(::Type{Lz4Filter}) = H5Z_FILTER_LZ4 +filtername(::Type{Lz4Filter}) = lz4_name +filter_func(::Type{Lz4Filter}) = H5Z_filter_lz4 +filter_cfunc(::Type{Lz4Filter}) = @cfunction( + H5Z_filter_lz4, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(Lz4Filter) +end + +end # module CodecLz4Ext diff --git a/filters/H5Zlz4/LICENSE.txt b/ext/CodecLz4Ext/LICENSE.txt similarity index 100% rename from filters/H5Zlz4/LICENSE.txt rename to ext/CodecLz4Ext/LICENSE.txt diff --git a/ext/CodecLz4Ext/README.md b/ext/CodecLz4Ext/README.md new file mode 100644 index 000000000..e484d68bc --- /dev/null +++ b/ext/CodecLz4Ext/README.md @@ -0,0 +1,6 @@ +# CodecLz4Ext + +Implements the LZ4 filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. +See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zlz4.jl) + +This implements [HDF5 registered filter id 32004](https://portal.hdfgroup.org/display/support/Filters#Filters-32004). diff --git a/filters/H5Zlz4/THIRDPARTY.txt b/ext/CodecLz4Ext/THIRDPARTY.txt similarity index 95% rename from filters/H5Zlz4/THIRDPARTY.txt rename to ext/CodecLz4Ext/THIRDPARTY.txt index 745176c31..00acf0a3c 100644 --- a/filters/H5Zlz4/THIRDPARTY.txt +++ b/ext/CodecLz4Ext/THIRDPARTY.txt @@ -1,4 +1,4 @@ -H5Zlz4.jl is derived from H5Zlz4.c from HDF5 Group +CodecLz4Ext.jl is derived from H5Zlz4.c from HDF5 Group ================================================================================ LZ4 filter plugin license @@ -42,4 +42,4 @@ THIS SOFTWARE IS PROVIDED BY THE HDF GROUP AND THE CONTRIBUTORS "AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED. In no event shall The HDF Group or the Contributors be liable for any damages suffered by the users arising out of the use of this software, even if -advised of the possibility of such damage. \ No newline at end of file +advised of the possibility of such damage. diff --git a/ext/CodecZstdExt/CodecZstdExt.jl b/ext/CodecZstdExt/CodecZstdExt.jl new file mode 100644 index 000000000..ebb62feea --- /dev/null +++ b/ext/CodecZstdExt/CodecZstdExt.jl @@ -0,0 +1,128 @@ +#= +Derived from https://github.com/aparamon/HDF5Plugin-Zstandard, zstd_h5plugin.c +Licensed under Apache License Version 2.0, see licenses/H5Zzstd_LICENSE.txt + +The following license applies to the Julia port. +Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt +=# +module CodecZstdExt + +using CodecZstd +import CodecZstd.LibZstd +using HDF5.API +import HDF5.Filters: + Filter, filterid, register_filter, filterid, filtername, filter_func, filter_cfunc + +const H5Z_FILTER_ZSTD = API.H5Z_filter_t(32015) +const zstd_name = "Zstandard compression: http://www.zstd.net" + +export H5Z_filter_zstd, H5Z_FILTER_ZSTD, ZstdFilter + +# cd_values First optional value is the compressor aggression +# Default is CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT +function H5Z_filter_zstd( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + inbuf = unsafe_load(buf) + outbuf = C_NULL + origSize = nbytes + ret_value = Csize_t(0) + + try + if flags & API.H5Z_FLAG_REVERSE != 0 + #decompresssion + + decompSize = LibZstd.ZSTD_getDecompressedSize(inbuf, origSize) + if decompSize == 0 + error("zstd_h5plugin: Cannot retrieve decompressed chunk size") + end + outbuf = Libc.malloc(decompSize) + if outbuf == C_NULL + error( + "zstd_h5plugin: Cannot allocate memory for outbuf during decompression." + ) + end + decompSize = LibZstd.ZSTD_decompress(outbuf, decompSize, inbuf, origSize) + Libc.free(inbuf) + unsafe_store!(buf, outbuf) + outbuf = C_NULL + ret_value = Csize_t(decompSize) + else + # compression + + if cd_nelmts > 0 + aggression = Cint(unsafe_load(cd_values)) + else + aggression = CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT + end + + if aggression < 1 + aggression = 1 # ZSTD_minCLevel() + elseif aggression > LibZstd.ZSTD_maxCLevel() + aggression = LibZstd.ZSTD_maxCLevel() + end + + compSize = LibZstd.ZSTD_compressBound(origSize) + outbuf = Libc.malloc(compSize) + if outbuf == C_NULL + error( + "zstd_h5plugin: Cannot allocate memory for outbuf during compression." + ) + end + + compSize = LibZstd.ZSTD_compress(outbuf, compSize, inbuf, origSize, aggression) + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, outbuf) + unsafe_store!(buf_size, compSize) + outbuf = C_NULL + ret_value = compSize + end + catch e + # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." + ret_value = Csize_t(0) + # Output Julia error via async so we do not task switch during callback + @async @error "H5Zzstd Non-Fatal ERROR: " exception = (e, catch_backtrace()) + finally + if outbuf != C_NULL + Libc.free(outbuf) + end + end # try catch finally + return Csize_t(ret_value) +end + +# Filters Module + +""" + ZstdFilter(clevel) + +Zstandard compression filter. `clevel` determines the compression level. + +# External Links +* [Zstandard HDF5 Filter ID 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) +* [Zstandard HDF5 Plugin Repository (C code)](https://github.com/aparamon/HDF5Plugin-Zstandard) +""" +struct ZstdFilter <: Filter + clevel::Cuint +end +ZstdFilter() = ZstdFilter(CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT) + +filterid(::Type{ZstdFilter}) = H5Z_FILTER_ZSTD +filtername(::Type{ZstdFilter}) = zstd_name +filter_func(::Type{ZstdFilter}) = H5Z_filter_zstd +filter_cfunc(::Type{ZstdFilter}) = @cfunction( + H5Z_filter_zstd, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(ZstdFilter) +end + +end # module H5Zzstd diff --git a/filters/H5Zzstd/LICENSE.txt b/ext/CodecZstdExt/LICENSE.txt similarity index 100% rename from filters/H5Zzstd/LICENSE.txt rename to ext/CodecZstdExt/LICENSE.txt diff --git a/ext/CodecZstdExt/Project.toml.old b/ext/CodecZstdExt/Project.toml.old new file mode 100644 index 000000000..2f4c1256c --- /dev/null +++ b/ext/CodecZstdExt/Project.toml.old @@ -0,0 +1,12 @@ +name = "H5Zzstd" +uuid = "f6f2d980-1ec6-471c-a70d-0270e22f1103" +version = "0.1.2" + +[deps] +CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" + +[compat] +HDF5 = "0.17" +CodecZstd = "0.7, 0.8" +julia = "1.3" diff --git a/ext/CodecZstdExt/README.md b/ext/CodecZstdExt/README.md new file mode 100644 index 000000000..e4e54e8cc --- /dev/null +++ b/ext/CodecZstdExt/README.md @@ -0,0 +1,8 @@ +# CodecZstdExt + +Implements the Zstd filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. +See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zzstd.jl) + +This implements [HDF5 ZStandard Filter 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) + +The contents of this package is derived from H5Zzstd.jl diff --git a/filters/H5Zzstd/THIRDPARTY.txt b/ext/CodecZstdExt/THIRDPARTY.txt similarity index 100% rename from filters/H5Zzstd/THIRDPARTY.txt rename to ext/CodecZstdExt/THIRDPARTY.txt diff --git a/ext/bitshuffle_jll_ext/LICENSE.txt b/ext/bitshuffle_jll_ext/LICENSE.txt new file mode 100644 index 000000000..281fd00a6 --- /dev/null +++ b/ext/bitshuffle_jll_ext/LICENSE.txt @@ -0,0 +1,23 @@ +H5Zbitshuffle - Julia wrapping of bitshuffle HDF5 Filter for improving +compression of typed binary data. + +Copyright (c) Australian Nuclear Science and Technology Organisation +2022 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/ext/bitshuffle_jll_ext/README.md b/ext/bitshuffle_jll_ext/README.md new file mode 100644 index 000000000..201f1eca9 --- /dev/null +++ b/ext/bitshuffle_jll_ext/README.md @@ -0,0 +1,6 @@ +# bitshuffle_jll_ext + +Implements the bitshuffle filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia, +with optional integrated lz4 and zstd (de)compression. + +This implements [HDF5 filter ID 32008](https://portal.hdfgroup.org/display/support/Filters#Filters-32008) diff --git a/ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl b/ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl new file mode 100644 index 000000000..e391ec1bb --- /dev/null +++ b/ext/bitshuffle_jll_ext/bitshuffle_jll_ext.jl @@ -0,0 +1,384 @@ +#== +Julia code wrapping the bitshuffle filter for HDF5. A rough translation of +bshuf_h5filter.c by Kiyoshi Masui, see +https://github.com/kiyo-masui/bitshuffle. +==# +""" +The bitshuffle filter for HDF5. See https://portal.hdfgroup.org/display/support/Filters#Filters-32008 +and https://github.com/kiyo-masui/bitshuffle for details. +""" +module bitshuffle_jll_ext + +using bitshuffle_jll + +using HDF5.API +import HDF5.Filters: + Filter, + filterid, + register_filter, + filtername, + filter_func, + filter_cfunc, + set_local_func, + set_local_cfunc + +export BSHUF_H5_COMPRESS_LZ4, + BSHUF_H5_COMPRESS_ZSTD, BitshuffleFilter, H5Z_filter_bitshuffle + +# From bshuf_h5filter.h + +const BSHUF_H5_COMPRESS_LZ4 = 2 +const BSHUF_H5_COMPRESS_ZSTD = 3 +const H5Z_FILTER_BITSHUFFLE = API.H5Z_filter_t(32008) + +const BSHUF_VERSION_MAJOR = 0 +const BSHUF_VERSION_MINOR = 4 +const BSHUF_VERSION_POINT = 2 + +const bitshuffle_name = "HDF5 bitshuffle filter; see https://github.com/kiyo-masui/bitshuffle" + +# Set filter arguments + +function bitshuffle_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) + + # Sanity check of provided values and set element size + + bs_flags = Ref{Cuint}() + bs_values = Vector{Cuint}(undef, 8) + bs_nelements = Ref{Csize_t}(length(bs_values)) + + API.h5p_get_filter_by_id( + dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags, bs_nelements, bs_values, 0, C_NULL, C_NULL + ) + + @debug "Initial filter info" bs_flags bs_values bs_nelements + + flags = bs_flags[] + + # set values + + bs_values[1] = BSHUF_VERSION_MAJOR + bs_values[2] = BSHUF_VERSION_MINOR + + elem_size = API.h5t_get_size(htype) + + @debug "Element size for $htype reported as $elem_size" + + if elem_size <= 0 + return API.herr_t(-1) + end + + bs_values[3] = elem_size + nelements = bs_nelements[] + + # check user-supplied values + + if nelements > 3 + if bs_values[4] % 8 != 0 || bs_values[4] < 0 + return API.herr_t(-1) + end + end + + if nelements > 4 + if !(bs_values[5] in (0, BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) + return API.herr_t(-1) + end + end + + @debug "Final values" bs_values + + API.h5p_modify_filter(dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags[], nelements, bs_values) + + return API.herr_t(1) +end + +function H5Z_filter_bitshuffle( + flags::Cuint, + cd_nelmts::Csize_t, + cd_values::Ptr{Cuint}, + nbytes::Csize_t, + buf_size::Ptr{Csize_t}, + buf::Ptr{Ptr{Cvoid}} +)::Csize_t + in_buf = unsafe_load(buf) #in_buf is *void + out_buf = C_NULL + nbytes_out = 0 + block_size = 0 + + try #mop up errors at end + @debug "nelmts" cd_nelmts + + if cd_nelmts < 3 + error("bitshuffle_h5plugin: Not enough elements provided to bitshuffle filter") + end + + # Get needed information + + major = unsafe_load(cd_values, 1) + minor = unsafe_load(cd_values, 2) + elem_size = unsafe_load(cd_values, 3) + comp_lvl = unsafe_load(cd_values, 6) + compress_flag = unsafe_load(cd_values, 5) + + if cd_nelmts > 3 + block_size = unsafe_load(cd_values, 4) + end + + @debug "Major,minor:" major minor + @debug "element size, compress_level, compress_flag" elem_size comp_lvl compress_flag + + if block_size == 0 + block_size = ccall( + (:bshuf_default_block_size, libbitshuffle), Csize_t, (Csize_t,), elem_size + ) + end + + # Work out buffer sizes + + if cd_nelmts > 4 && + (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) + + # Use compression + + if (flags & API.H5Z_FLAG_REVERSE) != 0 # unshuffle and decompress + + # First 8 bytes is number of uncompressed bytes + nbytes_uncomp = ccall( + (:bshuf_read_uint64_BE, libbitshuffle), UInt64, (Ptr{Cvoid},), in_buf + ) + # Next 4 bytes are the block size + + block_size = + ccall( + (:bshuf_read_uint32_BE, libbitshuffle), + UInt32, + (Ptr{Cvoid},), + in_buf + 8 + ) ÷ elem_size + + in_buf += 12 + buf_size_out = nbytes_uncomp + + else #shuffle and compress + nbytes_uncomp = nbytes + if compress_flag == BSHUF_H5_COMPRESS_LZ4 + buf_size_out = + ccall( + (:bshuf_compress_lz4_bound, libbitshuffle), + Csize_t, + (Csize_t, Csize_t, Csize_t), + nbytes_uncomp ÷ elem_size, + elem_size, + block_size + ) + 12 + elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD + buf_size_out = + ccall( + (:bshuf_compress_zstd_bound, libbitshuffle), + Csize_t, + (Csize_t, Csize_t, Csize_t), + nbytes_uncomp ÷ elem_size, + elem_size, + block_size + ) + 12 + end + end + + else # No compression required + nbytes_uncomp = nbytes + buf_size_out = nbytes + end + + if nbytes_uncomp % elem_size != 0 + error( + "bitshuffle_h5plugin: Uncompressed size $nbytes_uncomp is not a multiple of $elem_size" + ) + end + + size = nbytes_uncomp ÷ elem_size + out_buf = Libc.malloc(buf_size_out) + if out_buf == C_NULL + error( + "bitshuffle_h5plugin: Cannot allocate memory for outbuf during decompression" + ) + end + + # Now perform the decompression + + if cd_nelmts > 4 && + (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) + if flags & API.H5Z_FLAG_REVERSE != 0 #unshuffle and decompress + if compress_flag == BSHUF_H5_COMPRESS_LZ4 + err = ccall( + (:bshuf_decompress_lz4, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD + err = ccall( + (:bshuf_decompress_zstd, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + end + nbytes_out = nbytes_uncomp + + else #shuffle and compress + ccall( + (:bshuf_write_uint64_BE, libbitshuffle), + Cvoid, + (Ptr{Cvoid}, UInt64), + out_buf, + nbytes_uncomp + ) + ccall( + (:bshuf_write_uint32_BE, libbitshuffle), + Cvoid, + (Ptr{Cvoid}, UInt32), + out_buf + 8, + block_size * elem_size + ) + + if compress_flag == BSHUF_H5_COMPRESS_LZ4 + err = ccall( + (:bshuf_compress_lz4, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf + 12, + size, + elem_size, + block_size + ) + else + err = ccall( + (:bshuf_compress_zstd, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf + 12, + size, + elem_size, + block_size + ) + end + + nbytes_out = err + 12 + end + else # just the shuffle thanks + if flags & API.H5Z_FLAG_REVERSE != 0 + err = ccall( + (:bshuf_bitunshuffle, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + else + err = ccall( + (:bshuf_bitshuffle, libbitshuffle), + Int64, + (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), + in_buf, + out_buf, + size, + elem_size, + block_size + ) + end + + nbytes_out = nbytes + end + + # And wrap it up + + if err < 0 + error("h5plugin_bitshuffle: Error in bitshuffle with code $err") + end + + Libc.free(unsafe_load(buf)) + unsafe_store!(buf, out_buf) + unsafe_store!(buf_size, Csize_t(buf_size_out)) + out_buf = C_NULL + + catch e + + # On failure, return 0 and change no arguments + + nbytes_out = Csize_t(0) + @error "Non-fatal H5 bitshuffle plugin error: " e + display(stacktrace(catch_backtrace())) + + finally + if out_buf != C_NULL + Libc.free(out_buf) + end + end + + return Csize_t(nbytes_out) +end + +# Filter registration + +# All information for the filter + +struct BitshuffleFilter <: Filter + major::Cuint + minor::Cuint + typesize::Cuint + blocksize::Cuint + compression::Cuint + comp_level::Cuint #Zstd only +end + +""" + BitshuffleFilter(blocksize=0,compressor=:none,comp_level=0) + +The Bitshuffle filter can optionally include compression :lz4 or :zstd. For :zstd +comp_level can be provided. This is ignored for :lz4 compression. If `blocksize` +is zero the default bitshuffle blocksize is used. +""" +function BitshuffleFilter(; blocksize=0, compressor=:none, comp_level=0) + compressor in (:lz4, :zstd, :none) || + throw(ArgumentError("Invalid bitshuffle compression $compressor")) + compcode = 0 + if compressor == :lz4 + compcode = BSHUF_H5_COMPRESS_LZ4 + elseif compressor == :zstd + compcode = BSHUF_H5_COMPRESS_ZSTD + end + BitshuffleFilter( + BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR, 0, blocksize, compcode, comp_level + ) +end + +filterid(::Type{BitshuffleFilter}) = H5Z_FILTER_BITSHUFFLE +filtername(::Type{BitshuffleFilter}) = bitshuffle_name +set_local_func(::Type{BitshuffleFilter}) = bitshuffle_set_local +set_local_cfunc(::Type{BitshuffleFilter}) = + @cfunction(bitshuffle_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) +filterfunc(::Type{BitshuffleFilter}) = H5Z_filter_bitshuffle +filter_cfunc(::Type{BitshuffleFilter}) = @cfunction( + H5Z_filter_bitshuffle, + Csize_t, + (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) +) + +function __init__() + register_filter(BitshuffleFilter) +end + +end # module diff --git a/filters/H5Zbitshuffle/src/.H5Zbitshuffle.jl.swp b/filters/H5Zbitshuffle/src/.H5Zbitshuffle.jl.swp new file mode 100644 index 0000000000000000000000000000000000000000..36d4c6c5cf862c3b31507a32181f317ae4c9c1f8 GIT binary patch literal 12288 zcmeI2J8#oa6o7AG;!!|676x=l7N}A?q(rJB1=P?+P1To@v_lyzC%#T@>)285b=4p! zD`KLI46wk=FJOa#g@J*AB7OmgKY)PaN64d2)pi3NOP{P9A0MCdIkpt#R+6iQ1v;su z2yEvFNv+*8x6@@r?jSVg(_YUm83F+8vE-c+P_D$Rl3aZMwYjIsQ*rqx=olmPK zwTGjrDvTp5L9uJ}cG%(D7E6wrAQ8JObk@QmHv8(jL`ITYW z7)JwW01co4G=K)s02)98Xy89IAe$rPAI=(1-t}w z;0mw{y?zE>0M7xh_b0$z;4*+48bAYR01co4G=K)sz>y3v+n{mUa17UCgj5|{NZMVW zso8pi(QGGjsAO2)sjXX<#F?FxkC%huiZRo9Z2n+D_8=ob-xVJb-cOuq;}f*yL343 zuP2VKkQ>PAoa}!~1_vEdA~JVqp`%IZn&>ur@rr>Ah!hBlr~D>;&NrzfsSJ62wG?xPsd?>c^&*3q2n*evpKbetU!j z?>x^)_Xvr_Vgy#W)4G&Rkx|temvy(IRGo&p!Q0MwLl-VryC+c7X;Fhoom)a7qL&x2ZU-nrd*-wDh(r+@^O}4M&WJ`|*iXGCAq@ nn9at?f&=er$D|UjT6GL4tqWRV%!b$bCNn6vr9(+Pn> literal 0 HcmV?d00001 diff --git a/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl b/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl index ca5dc50de..429c146cf 100644 --- a/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl +++ b/filters/H5Zbitshuffle/src/H5Zbitshuffle.jl @@ -1,7 +1,5 @@ #== -Julia code wrapping the bitshuffle filter for HDF5. A rough translation of -bshuf_h5filter.c by Kiyoshi Masui, see -https://github.com/kiyo-masui/bitshuffle. +Most of the code has been moved into ==# """ The bitshuffle filter for HDF5. See https://portal.hdfgroup.org/display/support/Filters#Filters-32008 @@ -11,6 +9,7 @@ module H5Zbitshuffle using bitshuffle_jll +using HDF5: HDF5 using HDF5.API import HDF5.Filters: Filter, @@ -25,360 +24,21 @@ import HDF5.Filters: export BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD, BitshuffleFilter, H5Z_filter_bitshuffle -# From bshuf_h5filter.h +const bitshuffle_jll_ext = Base.get_extension(HDF5, :bitshuffle_jll_ext) -const BSHUF_H5_COMPRESS_LZ4 = 2 -const BSHUF_H5_COMPRESS_ZSTD = 3 -const H5Z_FILTER_BITSHUFFLE = API.H5Z_filter_t(32008) +const BSHUF_H5_COMPRESS_LZ4 = bitshuffle_jll_ext.BSHUF_H5_COMPRESS_LZ4 +const BSHUF_H5_COMPRESS_ZSTD = bitshuffle_jll_ext.BSHUF_H5_COMPRESS_ZSTD +const BitshuffleFilter = bitshuffle_jll_ext.BitshuffleFilter +const H5Z_filter_bitshuffle = bitshuffle_jll_ext.H5Z_filter_bitshuffle -const BSHUF_VERSION_MAJOR = 0 -const BSHUF_VERSION_MINOR = 4 -const BSHUF_VERSION_POINT = 2 +const BSHUF_H5_COMPRESS_LZ4 = bitshuffle_jll_ext.BSHUF_H5_COMPRESS_LZ4 +const BSHUF_H5_COMPRESS_ZSTD = bitshuffle_jll_ext.BSHUF_H5_COMPRESS_ZSTD +const H5Z_FILTER_BITSHUFFLE = bitshuffle_jll_ext.H5Z_FILTER_BITSHUFFLE -const bitshuffle_name = "HDF5 bitshuffle filter; see https://github.com/kiyo-masui/bitshuffle" +const BSHUF_VERSION_MAJOR = bitshuffle_jll_ext.BSHUF_VERSION_MAJOR +const BSHUF_VERSION_MINOR = bitshuffle_jll_ext.BSHUF_VERSION_MINOR +const BSHUF_VERSION_POINT = bitshuffle_jll_ext.BSHUF_VERSION_POINT -# Set filter arguments - -function bitshuffle_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) - - # Sanity check of provided values and set element size - - bs_flags = Ref{Cuint}() - bs_values = Vector{Cuint}(undef, 8) - bs_nelements = Ref{Csize_t}(length(bs_values)) - - API.h5p_get_filter_by_id( - dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags, bs_nelements, bs_values, 0, C_NULL, C_NULL - ) - - @debug "Initial filter info" bs_flags bs_values bs_nelements - - flags = bs_flags[] - - # set values - - bs_values[1] = BSHUF_VERSION_MAJOR - bs_values[2] = BSHUF_VERSION_MINOR - - elem_size = API.h5t_get_size(htype) - - @debug "Element size for $htype reported as $elem_size" - - if elem_size <= 0 - return API.herr_t(-1) - end - - bs_values[3] = elem_size - nelements = bs_nelements[] - - # check user-supplied values - - if nelements > 3 - if bs_values[4] % 8 != 0 || bs_values[4] < 0 - return API.herr_t(-1) - end - end - - if nelements > 4 - if !(bs_values[5] in (0, BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) - return API.herr_t(-1) - end - end - - @debug "Final values" bs_values - - API.h5p_modify_filter(dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags[], nelements, bs_values) - - return API.herr_t(1) -end - -function H5Z_filter_bitshuffle( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - in_buf = unsafe_load(buf) #in_buf is *void - out_buf = C_NULL - nbytes_out = 0 - block_size = 0 - - try #mop up errors at end - @debug "nelmts" cd_nelmts - - if cd_nelmts < 3 - error("bitshuffle_h5plugin: Not enough elements provided to bitshuffle filter") - end - - # Get needed information - - major = unsafe_load(cd_values, 1) - minor = unsafe_load(cd_values, 2) - elem_size = unsafe_load(cd_values, 3) - comp_lvl = unsafe_load(cd_values, 6) - compress_flag = unsafe_load(cd_values, 5) - - if cd_nelmts > 3 - block_size = unsafe_load(cd_values, 4) - end - - @debug "Major,minor:" major minor - @debug "element size, compress_level, compress_flag" elem_size comp_lvl compress_flag - - if block_size == 0 - block_size = ccall( - (:bshuf_default_block_size, libbitshuffle), Csize_t, (Csize_t,), elem_size - ) - end - - # Work out buffer sizes - - if cd_nelmts > 4 && - (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) - - # Use compression - - if (flags & API.H5Z_FLAG_REVERSE) != 0 # unshuffle and decompress - - # First 8 bytes is number of uncompressed bytes - nbytes_uncomp = ccall( - (:bshuf_read_uint64_BE, libbitshuffle), UInt64, (Ptr{Cvoid},), in_buf - ) - # Next 4 bytes are the block size - - block_size = - ccall( - (:bshuf_read_uint32_BE, libbitshuffle), - UInt32, - (Ptr{Cvoid},), - in_buf + 8 - ) ÷ elem_size - - in_buf += 12 - buf_size_out = nbytes_uncomp - - else #shuffle and compress - nbytes_uncomp = nbytes - if compress_flag == BSHUF_H5_COMPRESS_LZ4 - buf_size_out = - ccall( - (:bshuf_compress_lz4_bound, libbitshuffle), - Csize_t, - (Csize_t, Csize_t, Csize_t), - nbytes_uncomp ÷ elem_size, - elem_size, - block_size - ) + 12 - elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD - buf_size_out = - ccall( - (:bshuf_compress_zstd_bound, libbitshuffle), - Csize_t, - (Csize_t, Csize_t, Csize_t), - nbytes_uncomp ÷ elem_size, - elem_size, - block_size - ) + 12 - end - end - - else # No compression required - nbytes_uncomp = nbytes - buf_size_out = nbytes - end - - if nbytes_uncomp % elem_size != 0 - error( - "bitshuffle_h5plugin: Uncompressed size $nbytes_uncomp is not a multiple of $elem_size" - ) - end - - size = nbytes_uncomp ÷ elem_size - out_buf = Libc.malloc(buf_size_out) - if out_buf == C_NULL - error( - "bitshuffle_h5plugin: Cannot allocate memory for outbuf during decompression" - ) - end - - # Now perform the decompression - - if cd_nelmts > 4 && - (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD)) - if flags & API.H5Z_FLAG_REVERSE != 0 #unshuffle and decompress - if compress_flag == BSHUF_H5_COMPRESS_LZ4 - err = ccall( - (:bshuf_decompress_lz4, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD - err = ccall( - (:bshuf_decompress_zstd, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - end - nbytes_out = nbytes_uncomp - - else #shuffle and compress - ccall( - (:bshuf_write_uint64_BE, libbitshuffle), - Cvoid, - (Ptr{Cvoid}, UInt64), - out_buf, - nbytes_uncomp - ) - ccall( - (:bshuf_write_uint32_BE, libbitshuffle), - Cvoid, - (Ptr{Cvoid}, UInt32), - out_buf + 8, - block_size * elem_size - ) - - if compress_flag == BSHUF_H5_COMPRESS_LZ4 - err = ccall( - (:bshuf_compress_lz4, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf + 12, - size, - elem_size, - block_size - ) - else - err = ccall( - (:bshuf_compress_zstd, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf + 12, - size, - elem_size, - block_size - ) - end - - nbytes_out = err + 12 - end - else # just the shuffle thanks - if flags & API.H5Z_FLAG_REVERSE != 0 - err = ccall( - (:bshuf_bitunshuffle, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - else - err = ccall( - (:bshuf_bitshuffle, libbitshuffle), - Int64, - (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t, Csize_t, Csize_t), - in_buf, - out_buf, - size, - elem_size, - block_size - ) - end - - nbytes_out = nbytes - end - - # And wrap it up - - if err < 0 - error("h5plugin_bitshuffle: Error in bitshuffle with code $err") - end - - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, out_buf) - unsafe_store!(buf_size, Csize_t(buf_size_out)) - out_buf = C_NULL - - catch e - - # On failure, return 0 and change no arguments - - nbytes_out = Csize_t(0) - @error "Non-fatal H5 bitshuffle plugin error: " e - display(stacktrace(catch_backtrace())) - - finally - if out_buf != C_NULL - Libc.free(out_buf) - end - end - - return Csize_t(nbytes_out) -end - -# Filter registration - -# All information for the filter - -struct BitshuffleFilter <: Filter - major::Cuint - minor::Cuint - typesize::Cuint - blocksize::Cuint - compression::Cuint - comp_level::Cuint #Zstd only -end - -""" - BitshuffleFilter(blocksize=0,compressor=:none,comp_level=0) - -The Bitshuffle filter can optionally include compression :lz4 or :zstd. For :zstd -comp_level can be provided. This is ignored for :lz4 compression. If `blocksize` -is zero the default bitshuffle blocksize is used. -""" -function BitshuffleFilter(; blocksize=0, compressor=:none, comp_level=0) - compressor in (:lz4, :zstd, :none) || - throw(ArgumentError("Invalid bitshuffle compression $compressor")) - compcode = 0 - if compressor == :lz4 - compcode = BSHUF_H5_COMPRESS_LZ4 - elseif compressor == :zstd - compcode = BSHUF_H5_COMPRESS_ZSTD - end - BitshuffleFilter( - BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR, 0, blocksize, compcode, comp_level - ) -end - -filterid(::Type{BitshuffleFilter}) = H5Z_FILTER_BITSHUFFLE -filtername(::Type{BitshuffleFilter}) = bitshuffle_name -set_local_func(::Type{BitshuffleFilter}) = bitshuffle_set_local -set_local_cfunc(::Type{BitshuffleFilter}) = - @cfunction(bitshuffle_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) -filterfunc(::Type{BitshuffleFilter}) = H5Z_filter_bitshuffle -filter_cfunc(::Type{BitshuffleFilter}) = @cfunction( - H5Z_filter_bitshuffle, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) - -function __init__() - register_filter(BitshuffleFilter) -end +const bitshuffle_name = bitshuffle_jll_ext.bitshuffle_name end # module diff --git a/filters/H5Zblosc/src/H5Zblosc.jl b/filters/H5Zblosc/src/H5Zblosc.jl index 26b5d1ae4..08c4ff8ac 100644 --- a/filters/H5Zblosc/src/H5Zblosc.jl +++ b/filters/H5Zblosc/src/H5Zblosc.jl @@ -2,6 +2,7 @@ module H5Zblosc # port of https://github.com/Blosc/c-blosc/blob/3a668dcc9f61ad22b5c0a0ab45fe8dad387277fd/hdf5/blosc_filter.c (copyright 2010 Francesc Alted, license: MIT/expat) import Blosc +using HDF5: HDF5 using HDF5.API import HDF5.Filters: Filter, FilterPipeline import HDF5.Filters: @@ -19,203 +20,13 @@ export H5Z_FILTER_BLOSC, blosc_filter, BloscFilter # Import Blosc shuffle constants import Blosc: NOSHUFFLE, SHUFFLE, BITSHUFFLE -const H5Z_FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc -const FILTER_BLOSC_VERSION = 2 -const blosc_name = "blosc" +const BloscExt = Base.get_extension(HDF5, :BloscExt) -function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) - blosc_flags = Ref{Cuint}() - blosc_values = Vector{Cuint}(undef, 8) - blosc_nelements = Ref{Csize_t}(length(blosc_values)) - blosc_chunkdims = Vector{API.hsize_t}(undef, 32) +const blosc_filter = BloscExt.blosc_filter +const BloscFilter = BloscExt.BloscFilter - API.h5p_get_filter_by_id( - dcpl, - H5Z_FILTER_BLOSC, - blosc_flags, - blosc_nelements, - blosc_values, - 0, - C_NULL, - C_NULL - ) - flags = blosc_flags[] - - nelements = max(blosc_nelements[], 4) # First 4 slots reserved - - # Set Blosc info in first two slots - blosc_values[1] = FILTER_BLOSC_VERSION - blosc_values[2] = Blosc.VERSION_FORMAT - - ndims = API.h5p_get_chunk(dcpl, 32, blosc_chunkdims) - chunksize = prod(resize!(blosc_chunkdims, ndims)) - if ndims < 0 || ndims > 32 || chunksize > Blosc.MAX_BUFFERSIZE - return API.herr_t(-1) - end - - htypesize = API.h5t_get_size(htype) - if API.h5t_get_class(htype) == API.H5T_ARRAY - hsuper = API.h5t_get_super(htype) - basetypesize = API.h5t_get_size(hsuper) - API.h5t_close(hsuper) - else - basetypesize = htypesize - end - - # Limit large typesizes (they are pretty inefficient to shuffle - # and, in addition, Blosc does not handle typesizes larger than - # blocksizes). - if basetypesize > Blosc.MAX_TYPESIZE - basetypesize = 1 - end - blosc_values[3] = basetypesize - blosc_values[4] = chunksize * htypesize # size of the chunk - - API.h5p_modify_filter(dcpl, H5Z_FILTER_BLOSC, flags, nelements, blosc_values) - - return API.herr_t(1) -end - -function blosc_filter( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -) - typesize = unsafe_load(cd_values, 3) # The datatype size - outbuf_size = unsafe_load(cd_values, 4) - # Compression level: - clevel = cd_nelmts >= 5 ? unsafe_load(cd_values, 5) : Cuint(5) - # Do shuffle: - doshuffle = cd_nelmts >= 6 ? unsafe_load(cd_values, 6) : SHUFFLE - - if (flags & API.H5Z_FLAG_REVERSE) == 0 # compressing - # Allocate an output buffer exactly as long as the input data; if - # the result is larger, we simply return 0. The filter is flagged - # as optional, so HDF5 marks the chunk as uncompressed and proceeds. - outbuf_size = unsafe_load(buf_size) - outbuf = Libc.malloc(outbuf_size) - outbuf == C_NULL && return Csize_t(0) - - compname = if cd_nelmts >= 7 - compcode = unsafe_load(cd_values, 7) - Blosc.compname(compcode) - else - "blosclz" - end - Blosc.set_compressor(compname) - status = Blosc.blosc_compress( - clevel, doshuffle, typesize, nbytes, unsafe_load(buf), outbuf, nbytes - ) - status < 0 && (Libc.free(outbuf); return Csize_t(0)) - else # decompressing - # Extract the exact outbuf_size from the buffer header. - # - # NOTE: the guess value got from "cd_values" corresponds to the - # uncompressed chunk size but it should not be used in a general - # cases since other filters in the pipeline can modify the buffer - # size. - in = unsafe_load(buf) - # See https://github.com/JuliaLang/julia/issues/43402 - # Resolved in https://github.com/JuliaLang/julia/pull/43408 - outbuf_size, cbytes, blocksize = Blosc.cbuffer_sizes(in) - outbuf = Libc.malloc(outbuf_size) - outbuf == C_NULL && return Csize_t(0) - status = Blosc.blosc_decompress(in, outbuf, outbuf_size) - status <= 0 && (Libc.free(outbuf); return Csize_t(0)) - end - - if status != 0 - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outbuf) - unsafe_store!(buf_size, outbuf_size) - return Csize_t(status) # size of compressed/decompressed data - end - Libc.free(outbuf) - return Csize_t(0) -end - -""" - BloscFilter(;level=5, shuffle=true, compressor="blosclz") - -The Blosc compression filter, using [Blosc.jl](https://github.com/JuliaIO/Blosc.jl). Options: - - - `level`: compression level - - `shuffle`: whether to shuffle data before compressing (this option should be used instead of the [`Shuffle`](@ref) filter) - - `compressor`: the compression algorithm. Call `Blosc.compressors()` for the available compressors. - -# External links -* [What Is Blosc?](https://www.blosc.org/pages/blosc-in-depth/) -* [Blosc HDF5 Filter ID 32001](https://portal.hdfgroup.org/display/support/Filters#Filters-32001) -* [Blosc HDF5 Plugin Repository (C code)](https://github.com/Blosc/hdf5-blosc) -""" -struct BloscFilter <: Filter - blosc_version::Cuint - version_format::Cuint - typesize::Cuint - bufsize::Cuint - level::Cuint - shuffle::Cuint - compcode::Cuint -end - -function BloscFilter(; level=5, shuffle=SHUFFLE, compressor="blosclz") - Blosc.isvalidshuffle(shuffle) || throw(ArgumentError("invalid blosc shuffle $shuffle")) - compcode = Blosc.compcode(compressor) - BloscFilter(0, 0, 0, 0, level, shuffle, compcode) -end - -filterid(::Type{BloscFilter}) = H5Z_FILTER_BLOSC -filtername(::Type{BloscFilter}) = blosc_name -set_local_func(::Type{BloscFilter}) = blosc_set_local -set_local_cfunc(::Type{BloscFilter}) = - @cfunction(blosc_set_local, API.herr_t, (API.hid_t, API.hid_t, API.hid_t)) -filter_func(::Type{BloscFilter}) = blosc_filter -filter_cfunc(::Type{BloscFilter}) = @cfunction( - blosc_filter, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) - -function Base.show(io::IO, blosc::BloscFilter) - print( - io, - BloscFilter, - "(level=", - Int(blosc.level), - ",shuffle=", - blosc.shuffle == NOSHUFFLE ? "NOSHUFFLE" : - blosc.shuffle == SHUFFLE ? "SHUFFLE" : - blosc.shuffle == BITSHUFFLE ? "BITSHUFFLE" : - "UNKNOWN", - ",compressor=", - Blosc.compname(blosc.compcode), - ")" - ) -end - -function Base.push!(f::FilterPipeline, blosc::BloscFilter) - 0 <= blosc.level <= 9 || - throw(ArgumentError("blosc compression $(blosc.level) not in [0,9]")) - Blosc.isvalidshuffle(blosc.shuffle) || - throw(ArgumentError("invalid blosc shuffle $(blosc.shuffle)")) - ref = Ref(blosc) - GC.@preserve ref begin - API.h5p_set_filter( - f.plist, - filterid(BloscFilter), - API.H5Z_FLAG_OPTIONAL, - div(sizeof(BloscFilter), sizeof(Cuint)), - pointer_from_objref(ref) - ) - end - return f -end - -function __init__() - register_filter(BloscFilter) -end +const H5Z_FILTER_BLOSC = BloscExt.H5Z_FILTER_BLOSC +const FILTER_BLOSC_VERSION = BloscExt.FILTER_BLOSC_VERSION +const blosc_name = BloscExt.blosc_name end # module H5Zblosc diff --git a/filters/H5Zbzip2/src/H5Zbzip2.jl b/filters/H5Zbzip2/src/H5Zbzip2.jl index 65bbe7843..458e3af52 100644 --- a/filters/H5Zbzip2/src/H5Zbzip2.jl +++ b/filters/H5Zbzip2/src/H5Zbzip2.jl @@ -1,235 +1,22 @@ #= -The code below has been ported to Julia from the original C source: -https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/BZIP2/src/H5Zbzip2.c -The filter function H5Z_filter_bzip2 was adopted from: -PyTables http://www.pytables.org. -The plugin can be used with the HDF5 library version 1.8.11+ to read HDF5 datasets compressed with bzip2 created by PyTables. -License: licenses/H5Zbzip2_LICENSE.txt - -The following license applies to the Julia port. Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt =# module H5Zbzip2 -using CodecBzip2 -import CodecBzip2: libbzip2 +using CodecBzip2: libbzip2 +using HDF5: HDF5 + using HDF5.API import HDF5.Filters: Filter, filterid, register_filter, filtername, filter_func, filter_cfunc export H5Z_FILTER_BZIP2, H5Z_filter_bzip2, Bzip2Filter -const H5Z_FILTER_BZIP2 = API.H5Z_filter_t(307) -const bzip2_name = "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html" - -function H5Z_filter_bzip2( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - outbuf = C_NULL - outdatalen = Cuint(0) - - # Prepare the output buffer - - try - if flags & API.H5Z_FLAG_REVERSE != 0 - # Decompress - - outbuflen = nbytes * 3 + 1 - outbuf = Libc.malloc(outbuflen) - if outbuf == C_NULL - error("H5Zbzip2: memory allocation failed for bzip2 decompression.") - end - - stream = CodecBzip2.BZStream() - # Just use default malloc and free - stream.bzalloc = C_NULL - stream.bzfree = C_NULL - # BZ2_bzDecompressInit - ret = CodecBzip2.decompress_init!(stream, 0, false) - if ret != CodecBzip2.BZ_OK - errror("H5Zbzip2: bzip2 decompress start failed with error $ret.") - end - - stream.next_out = outbuf - stream.avail_out = outbuflen - stream.next_in = unsafe_load(buf) - stream.avail_in = nbytes - - cont = true - - while cont - # BZ2_bzDecompress - ret = CodecBzip2.decompress!(stream) - if ret < 0 - error("H5Zbzip2: bzip2 decompression failed with error $ret.") - end - cont = ret != CodecBzip2.BZ_STREAM_END - if cont && stream.avail_out == 0 - # Grow the output buffer - newbuflen = outbuflen * 2 - newbuf = Libc.realloc(outbuf, newbuflen) - if newbuf == C_NULL - error("H5Zbzip2: memory allocation failed for bzip2 decompression.") - end - stream.next_out = newbuf + outbuflen - stream.avail_out = outbuflen - outbuf = newbuf - outbuflen = newbuflen - end - end - - outdatalen = stream.total_out_lo32 - # BZ2_bzDecompressEnd - ret = CodecBzip2.decompress_end!(stream) - if ret != CodecBzip2.BZ_OK - error("H5Zbzip2: bzip2 compression end failed with error $ret.") - end - else - # Compress data - - # Maybe not the same size as outdatalen - odatalen = Cuint(0) - blockSize100k = 9 - - # Get compression blocksize if present - if cd_nelmts > 0 - blockSize100k = unsafe_load(cd_values) - if blockSize100k < 1 || blockSize100k > 9 - error("H5Zbzip2: Invalid compression blocksize: $blockSize100k") - end - end - - # Prepare the output buffer - outbuflen = nbytes + nbytes ÷ 100 + 600 # worse case (bzip2 docs) - outbuf = Libc.malloc(outbuflen) - @debug "Allocated" outbuflen outbuf - if outbuf == C_NULL - error("H5Zbzip2: Memory allocation failed for bzip2 compression") - end - - # Compress data - odatalen = outbuflen - r_odatalen = Ref{Cuint}(odatalen) - ret = BZ2_bzBuffToBuffCompress( - outbuf, r_odatalen, unsafe_load(buf), nbytes, blockSize100k, 0, 0 - ) - outdatalen = r_odatalen[] - if ret != CodecBzip2.BZ_OK - error("H5Zbzip2: bzip2 compression failed with error $ret.") - end - end # if flags & API.H5Z_FLAG_REVERSE != 0 - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outbuf) - unsafe_store!(buf_size, outbuflen) - - catch err - # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." - outdatalen = Csize_t(0) - if outbuf != C_NULL - Libc.free(outbuf) - end - @error "H5Zbzip2.jl Non-Fatal ERROR: " err - display(stacktrace(catch_backtrace())) - end # try - catch - - return Csize_t(outdatalen) -end # function H5Z_filter_bzip2 - -# Need stdcall for 32-bit Windows? -function BZ2_bzBuffToBuffCompress( - dest, destLen, source, sourceLen, blockSize100k, verbosity, workFactor -) - @static if CodecBzip2.WIN32 - return ccall( - ("BZ2_bzBuffToBuffCompress@28", libbzip2), - stdcall, - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - blockSize100k, - verbosity, - workFactor - ) - else - return ccall( - (:BZ2_bzBuffToBuffCompress, libbzip2), - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - blockSize100k, - verbosity, - workFactor - ) - end -end - -function BZ2_bzBuffToBuffDecompress(dest, destLen, source, sourceLen, small, verbosity) - @static if CodecBzip2.WIN32 - return ccall( - ("BZ2_bzBuffToBuffDecompress@24", libbzip2), - stdcall, - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - small, - verbosity - ) - else - return ccall( - (:BZ2_bzBuffToBuffDecompress, libbzip2), - Cint, - (Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), - dest, - destLen, - source, - sourceLen, - small, - verbosity - ) - end -end - -# Filters Module - -""" - Bzip2Filter(blockSize100k) - -Apply Bzip2 compression. The filter id is $H5Z_FILTER_BZIP2. - -# External Links -* [BZIP2 HDF5 Filter ID 307](https://portal.hdfgroup.org/display/support/Filters#Filters-307) -* [PyTables Repository (C code)](https://github.com/PyTables/PyTables) -""" -struct Bzip2Filter <: Filter - blockSize100k::Cuint -end -Bzip2Filter() = Bzip2Filter(9) - -filterid(::Type{Bzip2Filter}) = H5Z_FILTER_BZIP2 -filtername(::Type{Bzip2Filter}) = bzip2_name -filter_func(::Type{Bzip2Filter}) = H5Z_filter_bzip2 -filter_cfunc(::Type{Bzip2Filter}) = @cfunction( - H5Z_filter_bzip2, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) - -function __init__() - register_filter(Bzip2Filter) -end +const CodecBzip2Ext = Base.get_extension(HDF5, :CodecBzip2Ext) +const H5Z_FILTER_BZIP2 = CodecBzip2Ext.H5Z_FILTER_BZIP2 +const H5Z_filter_bzip2 = CodecBzip2Ext.H5Z_filter_bzip2 +const Bzip2Filter = CodecBzip2Ext.Bzip2Filter +const bzip2_name = CodecBzip2Ext.bzip2_name + end # module H5Zbzip2 diff --git a/filters/H5Zlz4/src/H5Zlz4.jl b/filters/H5Zlz4/src/H5Zlz4.jl index 39de93945..b7060e1f4 100644 --- a/filters/H5Zlz4/src/H5Zlz4.jl +++ b/filters/H5Zlz4/src/H5Zlz4.jl @@ -1,231 +1,26 @@ #= -This is a port of H5Zlz4.c to Julia -https://github.com/HDFGroup/hdf5_plugins/blob/master/LZ4/src/H5Zlz4.c -https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/LZ4/src/H5Zlz4.c -https://github.com/silx-kit/hdf5plugin/blob/main/src/LZ4/H5Zlz4.c - -H5Zlz4 is originally a copyright of HDF Group. License: licenses/H5Zlz4_LICENSE.txt - -The following license applies to the Julia port. Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt =# module H5Zlz4 using CodecLz4 +using HDF5: HDF5 using HDF5.API import HDF5.Filters: Filter, filterid, register_filter, filtername, filter_func, filter_cfunc export H5Z_FILTER_LZ4, H5Z_filter_lz4, Lz4Filter -const H5Z_FILTER_LZ4 = API.H5Z_filter_t(32004) - -const DEFAULT_BLOCK_SIZE = 1 << 30 -const lz4_name = "HDF5 lz4 filter; see http://www.hdfgroup.org/services/contributions.html" - -const LZ4_AGGRESSION = Ref(1) - -# flags H5Z_FLAG_REVERSE or H5Z_FLAG_OPTIONAL -# cd_nelmts number of elements in cd_values (0 or 1) -# cd_values the first optional element must be the blockSize -# nbytes - number of valid bytes of data -# buf_size - total size of buffer -# buf - pointer to pointer of data -function H5Z_filter_lz4( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - outBuf = C_NULL - ret_value = Csize_t(0) - - try - if (flags & API.H5Z_FLAG_REVERSE) != 0 # reverse filter, decompressing - #i32Buf = Ref{UInt32}() - blockSize = UInt32(0) - roBuf = Ref{UInt8}() - rpos = Ptr{UInt8}(unsafe_load(buf)) - #i64Buf = Ptr{UInt64}(rpos) - # Load the first 8 bytes from buffer as a big endian UInt64 - # This is the original size of the buffer - origSize = ntoh(unsafe_load(Ptr{UInt64}(rpos))) - rpos += 8 # advance the pointer - - # Next read the next four bytes from the buffer as a big endian UInt32 - # This is the blocksize - #i32Buf[] = rpos - blockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) - rpos += 4 - if blockSize > origSize - blockSize = origSize - end - - # malloc a byte buffer of origSize - # outBuf = Vector{UInt8}(undef, origSize) - @debug "OrigSize" origSize - outBuf = Libc.malloc(origSize) - # Julia should throw an error if it cannot allocate this - roBuf = Ptr{UInt8}(outBuf) - decompSize = 0 - # Start with the first blockSize - while decompSize < origSize - # compressedBlockSize = UInt32(0) - if origSize - decompSize < blockSize # the last block can be smaller than block size - blockSize = origSize - decompSize - end - - #i32Buf[] = rpos - compressedBlockSize = ntoh(unsafe_load(Ptr{UInt32}(rpos))) - rpos += 4 - - if compressedBlockSize == blockSize - # There was no compression - # memcpy(roBuf, rpos, blockSize) - unsafe_copyto!(roBuf, rpos, blockSize) - decompressedBytes = blockSize - else - # do the compression - # LZ4_decompress_fast, version number 10300 ? - @debug "decompress_safe" rpos roBuf compressedBlockSize ( - origSize - decompSize - ) - decompressedBytes = CodecLz4.LZ4_decompress_safe( - rpos, roBuf, compressedBlockSize, origSize - decompSize - ) - @debug "decompressedBytes" decompressedBytes - end - - rpos += compressedBlockSize - roBuf += blockSize - decompSize += decompressedBytes - end - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outBuf) - outBuf = C_NULL - ret_value = Csize_t(origSize) - else - # forward filter - # compressing - #i64Buf = Ref{UInt64}() - #i32Buf = Ref{UInt32}() - - if nbytes > typemax(Int32) - error("Can only compress chunks up to 2GB") - end - blockSize = unsafe_load(cd_values) - if cd_nelmts > 0 && blockSize > 0 - else - blockSize = DEFAULT_BLOCK_SIZE - end - if blockSize > nbytes - blockSize = nbytes - end - nBlocks = (nbytes - 1) ÷ blockSize + 1 - maxDestSize = - nBlocks * CodecLz4.LZ4_compressBound(blockSize) + 4 + 8 + nBlocks * 4 - outBuf = Libc.malloc(maxDestSize) - - rpos = Ptr{UInt8}(unsafe_load(buf)) - roBuf = Ptr{UInt8}(outBuf) - - # Header - unsafe_store!(Ptr{UInt64}(roBuf), hton(UInt64(nbytes))) - roBuf += 8 - - unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(blockSize))) - roBuf += 4 +const CodecLz4Ext = Base.get_extension(HDF5, :CodecLz4Ext) - outSize = 12 +const H5Z_filter_lz4 = CodecLz4Ext.H5Z_filter_lz4 +const Lz4Filter = CodecLz4Ext.Lz4Filter - for block in 0:(nBlocks - 1) - # compBlockSize::UInt32 - origWritten = Csize_t(block * blockSize) - if nbytes - origWritten < blockSize # the last block may be < blockSize - blockSize = nbytes - origWritten - end +const H5Z_FILTER_LZ4 = CodecLz4Ext.H5Z_FILTER_LZ4 - # aggression = 1 is the same LZ4_compress_default - @debug "LZ4_compress_fast args" rpos outBuf roBuf roBuf + 4 blockSize nBlocks CodecLz4.LZ4_compressBound( - blockSize - ) - compBlockSize = UInt32( - CodecLz4.LZ4_compress_fast( - rpos, - roBuf + 4, - blockSize, - CodecLz4.LZ4_compressBound(blockSize), - LZ4_AGGRESSION[] - ) - ) - @debug "Compressed block size" compBlockSize +const DEFAULT_BLOCK_SIZE = CodecLz4Ext.DEFAULT_BLOCK_SIZE +const lz4_name = CodecLz4Ext.lz4_name - if compBlockSize == 0 - error("Could not compress block $block") - end - - if compBlockSize >= blockSize # compression did not save any space, do a memcpy instead - compBlockSize = blockSize - unsafe_copyto!(roBuf + 4, rpos, blockSize) - end - - unsafe_store!(Ptr{UInt32}(roBuf), hton(UInt32(compBlockSize))) # write blocksize - roBuf += 4 - - rpos += blockSize - roBuf += compBlockSize - outSize += compBlockSize + 4 - end - - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outBuf) - unsafe_store!(buf_size, outSize) - outBuf = C_NULL - ret_value = Csize_t(outSize) - end # (flags & API.H5Z_FLAG_REVERSE) != 0 - - catch err - # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." - ret_value = Csize_t(0) - @error "H5Zlz4.jl Non-Fatal ERROR: " err - display(stacktrace(catch_backtrace())) - finally - if outBuf != C_NULL - Libc.free(outBuf) - end - end - return Csize_t(ret_value) -end - -# Filters Module - -""" - Lz4Filter(blockSize) - -Apply LZ4 compression. `blockSize` is the main argument. The filter id is $H5Z_FILTER_LZ4. - -# External Links -* [LZ4 HDF5 Filter ID 32004](https://portal.hdfgroup.org/display/support/Filters#Filters-32004) -* [LZ4 HDF5 Plugin Repository (C code)](https://github.com/nexusformat/HDF5-External-Filter-Plugins/tree/master/LZ4) -""" -struct Lz4Filter <: Filter - blockSize::Cuint -end -Lz4Filter() = Lz4Filter(DEFAULT_BLOCK_SIZE) - -filterid(::Type{Lz4Filter}) = H5Z_FILTER_LZ4 -filtername(::Type{Lz4Filter}) = lz4_name -filter_func(::Type{Lz4Filter}) = H5Z_filter_lz4 -filter_cfunc(::Type{Lz4Filter}) = @cfunction( - H5Z_filter_lz4, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) - -function __init__() - register_filter(Lz4Filter) -end +const LZ4_AGGRESSION = CodecLz4Ext.LZ4_AGGRESSION end diff --git a/filters/H5Zzstd/Project.toml b/filters/H5Zzstd/Project.toml index 2f4c1256c..a9b09398b 100644 --- a/filters/H5Zzstd/Project.toml +++ b/filters/H5Zzstd/Project.toml @@ -1,6 +1,6 @@ name = "H5Zzstd" uuid = "f6f2d980-1ec6-471c-a70d-0270e22f1103" -version = "0.1.2" +version = "0.2.0" [deps] CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" diff --git a/filters/H5Zzstd/README.md b/filters/H5Zzstd/README.md index 1c6dff122..b6b4ff0f9 100644 --- a/filters/H5Zzstd/README.md +++ b/filters/H5Zzstd/README.md @@ -3,4 +3,11 @@ Implements the Zstd filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia. See the [documentation](https://juliaio.github.io/HDF5.jl/stable/filters/#H5Zzstd.jl) -This implements [HDF5 ZStandard Filter 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) \ No newline at end of file +This implements [HDF5 ZStandard Filter 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) + +This is a transitional package as the contents of this package are now +implemented by `CodecZstdExt`, an extension package to HDF5 that loads +when CodecZstd.jl is loaded. + +Loading this package will trigger loading of the extension since this +package loads both HDF5.jl and CodecZstd.jl. diff --git a/filters/H5Zzstd/src/H5Zzstd.jl b/filters/H5Zzstd/src/H5Zzstd.jl index e0ea75499..94e2335b9 100644 --- a/filters/H5Zzstd/src/H5Zzstd.jl +++ b/filters/H5Zzstd/src/H5Zzstd.jl @@ -1,128 +1,24 @@ -#= -Derived from https://github.com/aparamon/HDF5Plugin-Zstandard, zstd_h5plugin.c -Licensed under Apache License Version 2.0, see licenses/H5Zzstd_LICENSE.txt - -The following license applies to the Julia port. -Copyright (c) 2021 Mark Kittisopikul and Howard Hughes Medical Institute. License MIT, see LICENSE.txt -=# -module H5Zzstd - -using CodecZstd -import CodecZstd.LibZstd -using HDF5.API -import HDF5.Filters: - Filter, filterid, register_filter, filterid, filtername, filter_func, filter_cfunc - -const H5Z_FILTER_ZSTD = API.H5Z_filter_t(32015) -const zstd_name = "Zstandard compression: http://www.zstd.net" - -export H5Z_filter_zstd, H5Z_FILTER_ZSTD, ZstdFilter - -# cd_values First optional value is the compressor aggression -# Default is CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT -function H5Z_filter_zstd( - flags::Cuint, - cd_nelmts::Csize_t, - cd_values::Ptr{Cuint}, - nbytes::Csize_t, - buf_size::Ptr{Csize_t}, - buf::Ptr{Ptr{Cvoid}} -)::Csize_t - inbuf = unsafe_load(buf) - outbuf = C_NULL - origSize = nbytes - ret_value = Csize_t(0) - - try - if flags & API.H5Z_FLAG_REVERSE != 0 - #decompresssion - - decompSize = LibZstd.ZSTD_getDecompressedSize(inbuf, origSize) - if decompSize == 0 - error("zstd_h5plugin: Cannot retrieve decompressed chunk size") - end - outbuf = Libc.malloc(decompSize) - if outbuf == C_NULL - error( - "zstd_h5plugin: Cannot allocate memory for outbuf during decompression." - ) - end - decompSize = LibZstd.ZSTD_decompress(outbuf, decompSize, inbuf, origSize) - Libc.free(inbuf) - unsafe_store!(buf, outbuf) - outbuf = C_NULL - ret_value = Csize_t(decompSize) - else - # compression - - if cd_nelmts > 0 - aggression = Cint(unsafe_load(cd_values)) - else - aggression = CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT - end - - if aggression < 1 - aggression = 1 # ZSTD_minCLevel() - elseif aggression > LibZstd.ZSTD_maxCLevel() - aggression = LibZstd.ZSTD_maxCLevel() - end - - compSize = LibZstd.ZSTD_compressBound(origSize) - outbuf = Libc.malloc(compSize) - if outbuf == C_NULL - error( - "zstd_h5plugin: Cannot allocate memory for outbuf during compression." - ) - end - - compSize = LibZstd.ZSTD_compress(outbuf, compSize, inbuf, origSize, aggression) - - Libc.free(unsafe_load(buf)) - unsafe_store!(buf, outbuf) - unsafe_store!(buf_size, compSize) - outbuf = C_NULL - ret_value = compSize - end - catch e - # "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." - ret_value = Csize_t(0) - # Output Julia error via async so we do not task switch during callback - @async @error "H5Zzstd Non-Fatal ERROR: " exception = (e, catch_backtrace()) - finally - if outbuf != C_NULL - Libc.free(outbuf) - end - end # try catch finally - return Csize_t(ret_value) -end +""" + H5Zzstd -# Filters Module +Transitional package to HDF5/CodecZstdExt. +The contents of this package are now contained within the package extension +CodecZstdExt. Loading this package will load the package extension. """ - ZstdFilter(clevel) +module H5Zzstd -Zstandard compression filter. `clevel` determines the compression level. +using HDF5: HDF5 +using CodecZstd: CodecZstd +const CodecZstdExt = Base.get_extension(HDF5, :CodecZstdExt) -# External Links -* [Zstandard HDF5 Filter ID 32015](https://portal.hdfgroup.org/display/support/Filters#Filters-32015) -* [Zstandard HDF5 Plugin Repository (C code)](https://github.com/aparamon/HDF5Plugin-Zstandard) -""" -struct ZstdFilter <: Filter - clevel::Cuint -end -ZstdFilter() = ZstdFilter(CodecZstd.LibZstd.ZSTD_CLEVEL_DEFAULT) +const H5Z_FILTER_ZSTD = CodecZstdExt.H5Z_FILTER_ZSTD +const zstd_name = CodecZstdExt.zstd_name -filterid(::Type{ZstdFilter}) = H5Z_FILTER_ZSTD -filtername(::Type{ZstdFilter}) = zstd_name -filter_func(::Type{ZstdFilter}) = H5Z_filter_zstd -filter_cfunc(::Type{ZstdFilter}) = @cfunction( - H5Z_filter_zstd, - Csize_t, - (Cuint, Csize_t, Ptr{Cuint}, Csize_t, Ptr{Csize_t}, Ptr{Ptr{Cvoid}}) -) +const H5Z_filter_zstd = CodecZstdExt.H5Z_filter_zstd +const H5Z_FILTER_ZSTD = CodecZstdExt.H5Z_FILTER_ZSTD +const ZstdFilter = CodecZstdExt.ZstdFilter -function __init__() - register_filter(ZstdFilter) -end +export H5Z_filter_zstd, H5Z_FILTER_ZSTD, ZstdFilter end # module H5Zzstd