Skip to content

Commit

Permalink
Add virtual dataset support (#1012)
Browse files Browse the repository at this point in the history
* add virtual dataset support
* clean up, add some docs
  • Loading branch information
simonbyrne committed Oct 17, 2022
1 parent 76ca279 commit 88da0ed
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 3 deletions.
8 changes: 8 additions & 0 deletions docs/src/interface/properties.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ StringCreateProperties
DatatypeCreateProperties
```

## Virtual Datasets

```@docs
VirtualMapping
VirtualLyout
```

## Drivers

```@meta
Expand All @@ -46,3 +53,4 @@ Core
POSIX
MPIO
```

1 change: 1 addition & 0 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ include("groups.jl")
include("datatypes.jl")
include("typeconversions.jl")
include("dataspaces.jl")
include("virtual.jl")
include("datasets.jl")
include("attributes.jl")
include("readwrite.jl")
Expand Down
19 changes: 19 additions & 0 deletions src/api/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,25 @@ function h5p_get_userblock(plist_id)
return len[]
end

function h5p_get_virtual_count(dcpl_id)
count = Ref{Csize_t}()
h5p_get_virtual_count(dcpl_id, count)
return count[]
end

function h5p_get_virtual_dsetname(dcpl_id, index)
len = h5p_get_virtual_dsetname(dcpl_id, index, C_NULL, 0)
buffer = StringVector(len)
h5p_get_virtual_dsetname(dcpl_id, index, buffer, len + 1)
return String(buffer)
end
function h5p_get_virtual_filename(dcpl_id, index)
len = h5p_get_virtual_filename(dcpl_id, index, C_NULL, 0)
buffer = StringVector(len)
h5p_get_virtual_filename(dcpl_id, index, buffer, len + 1)
return String(buffer)
end

function h5p_get_virtual_prefix(dapl_id)
virtual_file_len = h5p_get_virtual_prefix(dapl_id, C_NULL, 0)
buffer = StringVector(virtual_file_len)
Expand Down
1 change: 1 addition & 0 deletions src/api/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ _has_symbol(sym::Symbol) = Libdl.dlsym(libhdf5handle[], sym; throw_error=false)
const H5D_COMPACT = 0
const H5D_CONTIGUOUS = 1
const H5D_CHUNKED = 2
const H5D_VIRTUAL = 3

# allocation times (C enum H5D_alloc_time_t)
const H5D_ALLOC_TIME_ERROR = -1
Expand Down
17 changes: 14 additions & 3 deletions src/properties.jl
Original file line number Diff line number Diff line change
Expand Up @@ -431,15 +431,20 @@ Properties used when creating a new `Dataset`. Inherits from
- `:chunked`: Store raw data separately from the object header as chunks of
data in separate locations in the file.
- `:virtual`: Draw raw data from multiple datasets in different files.
- `:virtual`: Draw raw data from multiple datasets in different files. See
the `virtual` property below.
See $(h5doc("H5P_SET_LAYOUT")).
- `no_attrs_hint`: Minimize the space for dataset metadata by hinting that no
attributes will be added if set to `true`. Attributes can still be added but
may exist elsewhere within the file.
See $(h5doc("H5P_SET_DSET_NO_ATTRS_HINT")).
may exist elsewhere within the file. See
$(h5doc("H5P_SET_DSET_NO_ATTRS_HINT")).
- `virtual`: when specified, creates a virtual dataset (VDS). The argument
should be a "virtuala collection of [`VirtualMapping`](@ref) objects for
describing the mapping from the dataset to the source datasets. When accessed,
returns a [`VirtualLayout`](@ref) object.
The following options are shortcuts for the various filters, and are set-only.
They will be appended to the filter pipeline in the order in which they appear
Expand Down Expand Up @@ -498,6 +503,9 @@ set_shuffle!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p),
set_fletcher32!(p::Properties, val::Bool) = val && push!(Filters.FilterPipeline(p), Filters.Fletcher32())
set_blosc!(p::Properties, val) = error("The Blosc filter now requires the H5Zblosc package be loaded")

get_virtual(p::Properties) = VirtualLayout(p)
set_virtual!(p::Properties, vmaps) = append!(VirtualLayout(p), vmaps)


class_propertynames(::Type{DatasetCreateProperties}) = (
:alloc_time,
Expand All @@ -508,6 +516,7 @@ class_propertynames(::Type{DatasetCreateProperties}) = (
:filters,
:layout,
:no_attrs_hint,
:virtual,
# convenience
:blosc,
:deflate,
Expand All @@ -532,6 +541,7 @@ function class_getproperty(::Type{DatasetCreateProperties}, p::Properties, name:
false :
API.h5p_get_dset_no_attrs_hint(p)
) :
name === :virtual ? get_virtual(p) :
# deprecated
name === :filter ? (depwarn("`filter` property name is deprecated, use `filters` instead",:class_getproperty); get_filters(p)) :
class_getproperty(superclass(DatasetCreateProperties), p, name)
Expand All @@ -549,6 +559,7 @@ function class_setproperty!(::Type{DatasetCreateProperties}, p::Properties, name
error("no_attrs_hint is only valid for HDF5 library versions 1.10.5 or greater") :
API.h5p_set_dset_no_attrs_hint(p, val)
) :
name === :virtual ? set_virtual!(p, val) :
# set-only for convenience
name === :blosc ? set_blosc!(p, val) :
name === :deflate ? set_deflate!(p, val) :
Expand Down
61 changes: 61 additions & 0 deletions src/virtual.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
VirtualMapping(
vspace::Dataspace,
srcfile::AbstractString,
srcdset::AbstractString,
srcspace::Dataspace
)
Specify a map of elements of the virtual dataset (VDS) described by `vspace` to
the elements of the source dataset described by `srcspace`. The source dataset
is identified by the name of the file where it is located, `srcfile`, and the
name of the dataset, `srcdset`.
Both `srcfile` and `srcdset` support "printf"-style formats with `%b` being
replaced by the block count of the selection.
For more details on how source file resolution works, see
[`H5P_SET_VIRTUAL`](https://portal.hdfgroup.org/display/HDF5/H5P_SET_VIRTUAL).
"""
struct VirtualMapping
vspace::Dataspace
srcfile::String
srcdset::String
srcspace::Dataspace
end

"""
VirtualLayout(dcpl::DatasetCreateProperties)
The collection of [`VirtualMapping`](@ref)s associated with `dcpl`. This is an
`AbstractVector{VirtualMapping}`, supporting `length`, `getindex` and `push!`.
"""
struct VirtualLayout <: AbstractVector{VirtualMapping}
dcpl::DatasetCreateProperties
end

function Base.length(vlayout::VirtualLayout)
return API.h5p_get_virtual_count(vlayout.dcpl)
end
Base.size(vlayout::VirtualLayout) = (length(vlayout),)

function Base.push!(vlayout::VirtualLayout, vmap::VirtualMapping)
API.h5p_set_virtual(
vlayout.dcpl, vmap.vspace, vmap.srcfile, vmap.srcdset, vmap.srcspace
)
return vlayout
end
function Base.append!(vlayout::VirtualLayout, vmaps)
for vmap in vmaps
push!(vlayout, vmap)
end
return vlayout
end

function Base.getindex(vlayout::VirtualLayout, i::Integer)
vspace = Dataspace(API.h5p_get_virtual_vspace(vlayout.dcpl, i - 1))
srcfile = API.h5p_get_virtual_filename(vlayout.dcpl, i - 1)
srcdset = API.h5p_get_virtual_dsetname(vlayout.dcpl, i - 1)
srcspace = Dataspace(API.h5p_get_virtual_srcspace(vlayout.dcpl, i - 1))
return VirtualMapping(vspace, srcfile, srcdset, srcspace)
end
38 changes: 38 additions & 0 deletions test/virtual_dataset.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using Test, HDF5

dirname = mktempdir()

filename = joinpath(dirname, "main.hdf5")

f = h5open(filename, "w")

sub0 = joinpath(dirname, "sub-0.hdf5")
f0 = h5open(sub0, "w")
f0["x"] = fill(1.0, 3)
close(f0)

sub1 = joinpath(dirname, "sub-1.hdf5")
f1 = h5open(sub1, "w")
f1["x"] = fill(2.0, 3)
close(f1)

srcspace = dataspace((3,))
vspace = dataspace((3, 2); max_dims=(3, -1))
HDF5.select_hyperslab!(vspace, (1:3, HDF5.BlockRange(1; count=-1)))

d = create_dataset(
f,
"x",
datatype(Float64),
vspace;
virtual=[HDF5.VirtualMapping(vspace, "./sub-%0b.hdf5", "x", srcspace)]
)

@test size(d) == (3, 2)
@test read(d) == hcat(fill(1.0, 3), fill(2.0, 3))

dcpl = HDF5.get_create_properties(d)

@test dcpl.virtual isa HDF5.VirtualLayout
@test length(dcpl.virtual) == 1
@test dcpl.virtual[1] isa HDF5.VirtualMapping

0 comments on commit 88da0ed

Please sign in to comment.