Skip to content

Commit

Permalink
Implement getindex and setindex! for Datasets.
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasIsensee committed Aug 31, 2024
1 parent 8b6dbbc commit 67b38b7
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 45 deletions.
3 changes: 3 additions & 0 deletions src/data/reconstructing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ end
# jltype is the inverse of h5type, providing a ReadRepresentation for an
# H5Datatype. We handle shared datatypes here: ones that were not "committed" by JLD2.
function jltype(f::JLDFile, sdt::SharedDatatype)
if !(f.plain) && haskey(f.datatype_locations, sdt.header_offset)
return jltype(f, f.datatype_locations[sdt.header_offset])
end
haskey(f.h5jltype, sdt) && return f.h5jltype[sdt]::ReadRepresentation
dt, attrs = read_shared_datatype(f, sdt)
rr = jltype(f, dt)
Expand Down
42 changes: 9 additions & 33 deletions src/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,48 +66,24 @@ Otherwise, `datatype_offset` points to the offset of the datatype attribute.
filters::FilterPipeline=FilterPipeline(),
header_offset::RelOffset=NULL_REFERENCE,
attributes::Union{Vector{ReadAttribute},Nothing}=nothing)
# See if there is a julia type attribute
io = f.io
if dt isa SharedDatatype
# this means that it is "committed" to `_types` if the file was written by JLD2
rr = jltype(f, get(f.datatype_locations, dt.header_offset, dt))

if layout.data_offset == -1
# There was no layout message.
# That means, this dataset is just a datatype
# return the Datatype
return typeof(rr).parameters[1]
end

seek(io, layout.data_offset)
read_dataspace = (dataspace, header_offset, layout, filters)
read_data(f, rr, read_dataspace, attributes)

rr = jltype(f, dt)
if layout.data_offset == -1
# There was no layout message.
# That means, this dataset is just a datatype
return typeof(rr).parameters[1]
elseif layout.data_offset == typemax(Int64)
rr = jltype(f, dt)
T,S = typeof(rr).parameters
T,_ = typeof(rr).parameters
if layout.data_length > -1
# TODO: this could use the fill value message to populate the array
@warn "This array should be populated by a fill value. This is not (yet) implemented."
end
v = Array{T, 1}()
track_weakref!(f, header_offset, v)
return v
else
dtt = dt
rr = jltype(f, dtt)

if layout.data_offset == -1
# There was no layout message.
# That means, this dataset is just a datatype
# return the Datatype
return typeof(rr).parameters[1]
end

seek(io, layout.data_offset)
read_dataspace = (dataspace, header_offset, layout, filters)
read_data(f, rr, read_dataspace, attributes)
end
seek(f.io, layout.data_offset)
read_dataspace = (dataspace, header_offset, layout, filters)
read_data(f, rr, read_dataspace, attributes)
end

# Most types can only be scalars or arrays
Expand Down
63 changes: 51 additions & 12 deletions src/explicit_datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ function read_dataset(dset::Dataset)
DataLayout(f, dset.layout),
isnothing(dset.filters) ? FilterPipeline() : dset.filters,
dset.offset,
collect(values(dset.attributes)))
collect(ReadAttribute, values(dset.attributes)))
end

"""
Expand Down Expand Up @@ -392,11 +392,7 @@ function ismmappable(dset::Dataset)
iswritten(dset) || return false
f = dset.parent.f
dt = dset.datatype
if dt isa SharedDatatype
rr = jltype(f, get(f.datatype_locations, dt.header_offset, dt))
else
rr = jltype(f, dt)
end
rr = jltype(f, dt)
T = typeof(rr).parameters[1]
!(samelayout(T)) && return false
!isempty(dset.filters.filters) && return false
Expand Down Expand Up @@ -424,11 +420,7 @@ function readmmap(dset::Dataset)

# figure out the element type
dt = dset.datatype
if dt isa SharedDatatype
rr = jltype(f, get(f.datatype_locations, dt.header_offset, dt))
else
rr = jltype(f, dt)
end
rr = jltype(f, dt)
T = typeof(rr).parameters[1]
ndims, offset = get_ndims_offset(f, ReadDataspace(f, dset.dataspace), collect(values(dset.attributes)))

Expand Down Expand Up @@ -518,4 +510,51 @@ function allocate_early(dset::Dataset, T::DataType)
end
return offset
end
end
end

struct ArrayDataset{T, N, ODR, io} <: AbstractArray{T, N}
f::JLDFile{io}
dset::Dataset
dims::NTuple{N, Int}
data_address::Int64
rr::ReadRepresentation{T, ODR}
end
function ArrayDataset(dset::Dataset)
isarraydataset(dset) || throw(ArgumentError("Dataset is not an array"))
iscompressed(dset.filters) && throw(UnsupportedFeatureException("Compressed datasets are not supported."))
f = dset.parent.f
return ArrayDataset(
f, dset,
reverse(dset.dataspace.dimensions),
fileoffset(f, dset.layout.data_address),
jltype(f, dset.datatype))
end

function isarraydataset(dset::Dataset)
isnothing(dset.dataspace) && return false
ds = dset.dataspace
if ds isa HmWrap{HmDataspace}
return ds.dataspace_type == DS_SIMPLE || ds.dataspace_type == DS_V1
end
return false
end

Base.IndexStyle(::Type{<:ArrayDataset}) = IndexLinear()
Base.size(A::ArrayDataset) = A.dims
Base.getindex(dset::Dataset, I...) = ArrayDataset(dset)[I...]
Base.getindex(dset::Dataset) = read_dataset(dset)
Base.setindex!(dset::Dataset, v, i, I...) = Base.setindex!(ArrayDataset(dset), v, i, I...)

function Base.getindex(A::ArrayDataset, i::Int)
@boundscheck checkbounds(A, i)
seek(A.f.io, A.data_address + (i-1)*odr_sizeof(A.rr))
return read_scalar(A.f, A.rr, UNDEFINED_ADDRESS)
end

function Base.setindex!(A::ArrayDataset{T,N,ODR}, v, i::Int) where {T,N,ODR}
@boundscheck checkbounds(A, i)
A.f.writable || throw(ArgumentError("Cannot edit in read-only mode"))
seek(A.f.io, A.data_address + (i-1)*odr_sizeof(A.rr))
write_data(A.f.io, A.f, v, T, datamode(ODR), JLDWriteSession())
return v
end
32 changes: 32 additions & 0 deletions test/dataset_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,36 @@ using JLD2, Test
end
@test load(fn)["d"] == zeros(1000,1000)
end
end

@testset "Slicing & Updating" begin
cd(mktempdir()) do
fn = "test.jld2"
jldsave(fn; a=42, b = [42 43 44; 45 46 47], c = [(0x00, 1f0), (0x42, 2f0)])
jldopen(fn) do f
dset = JLD2.get_dataset(f, "a")
@test dset[] == 42

dset = JLD2.get_dataset(f, "b")
@test dset[] == [42 43 44; 45 46 47]
@test dset[1] == 42
@test dset[1,1] == 42
@test dset[1:2, 1:2] == [42 43; 45 46]
@test dset[1,1:2:3] == [42, 44]
@test_throws BoundsError dset[7]
@test_throws BoundsError dset[2,4]
@test_throws ArgumentError dset[1] = 1
end
jldopen(fn, "a") do f
dset = JLD2.get_dataset(f, "b")
dset[2] = -1
@test dset[] == [42 43 44; -1 46 47]
dset[1,1:2:3] = [1,5]
@test dset[] == [1 43 5; -1 46 47]

dset = JLD2.get_dataset(f, "c")
dset[2] = (0xff, 0f0)
@test f["c"] == [(0x00, 1f0), (0xff, 0f0)]
end
end
end

0 comments on commit 67b38b7

Please sign in to comment.