Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Slicing #589

Merged
merged 4 commits into from
Sep 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 18 additions & 37 deletions src/data/reconstructing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@

# jltype is the inverse of h5type, providing a ReadRepresentation for an
# H5Datatype. We handle committed datatypes here, and other datatypes below.
function jltype(f::JLDFile, cdt::CommittedDatatype)
function jltype(f::JLDFile, sdt::Union{SharedDatatype,CommittedDatatype})
cdt = get(f.datatype_locations, sdt.header_offset, sdt)
haskey(f.h5jltype, cdt) && return f.h5jltype[cdt]::ReadRepresentation

dt, attrs = read_shared_datatype(f, cdt)

julia_type_attr = nothing
Expand All @@ -75,51 +77,40 @@
written_type_attr = attr
end
end
isnothing(julia_type_attr) && return f.h5jltype[cdt] = jltype(f, dt)

if isa(julia_type_attr, Nothing)
throw(InvalidDataException())
end
julia_type_attr = julia_type_attr::ReadAttribute

# If type of datatype is this datatype, then this is the committed
# datatype that describes a datatype
if julia_type_attr.datatype isa SharedDatatype &&
julia_type_attr.datatype.header_offset == cdt.header_offset
# Verify that the datatype matches our expectations
# Bootstrap: the datatype of datatype is a datatype
if julia_type_attr.datatype == SharedDatatype(cdt.header_offset)
if dt != H5TYPE_DATATYPE
error("""The HDF5 datatype representing a Julia datatype does not match
the expectations of this version of JLD.

You may need to update JLD to read this file.""")
throw(InternalError("""The HDF5 datatype representing a Julia datatype does not match

Check warning on line 85 in src/data/reconstructing_datatypes.jl

View check run for this annotation

Codecov / codecov/patch

src/data/reconstructing_datatypes.jl#L85

Added line #L85 was not covered by tests
the expectations of this version of JLD2.
You may need to update JLD2 to read this file."""))
end
f.jlh5type[DataType] = cdt
f.datatypes[cdt.index] = dt
return (f.h5jltype[cdt] = ReadRepresentation{DataType, DataTypeODR()}())
end

f.plain && return f.h5jltype[cdt] = jltype(f, dt)

datatype = read_attr_data(f, julia_type_attr)
if f.plain && !(datatype isa Upgrade) && !(datatype <: Tuple)
rr = jltype(f, dt)
return f.h5jltype[cdt] = rr
end

if written_type_attr !== nothing
if !isnothing(written_type_attr)
# Custom serialization
custom_datatype = read_attr_data(f, written_type_attr)
read_as = _readas(custom_datatype, datatype)
if read_as <: UnknownType
@warn("custom serialization of $(typestring(read_as))" *
" encountered, but the type does not exist in the workspace; the data will be read unconverted")
rr = (constructrr(f, custom_datatype, dt, attrs)::Tuple{ReadRepresentation,Bool})[1]
rr, _ = constructrr(f, custom_datatype, dt, attrs)
canonical = false
else
rr, canonical = constructrr(f, custom_datatype, dt, attrs)::Tuple{ReadRepresentation,Bool}
rrty = typeof(rr)
rr = ReadRepresentation{read_as, CustomSerialization{rrty.parameters[1], rrty.parameters[2]}}()
canonical = canonical && writeas(read_as) === custom_datatype
rr, canonical = constructrr(f, custom_datatype, dt, attrs)
rr = ReadRepresentation{read_as, CustomSerialization{typeof(rr).parameters...}}()
canonical &= writeas(read_as) === custom_datatype
end
else
rr, canonical = constructrr(f, datatype, dt, attrs)::Tuple{ReadRepresentation,Bool}
rr, canonical = constructrr(f, datatype, dt, attrs)
end

canonical && (f.jlh5type[datatype] = cdt)
Expand All @@ -128,16 +119,6 @@
end


# jltype is the inverse of h5type, providing a ReadRepresentation for an
# H5Datatype. We handle shared datatypes here: ones that were not "committed" by JLD2.
function jltype(f::JLDFile, sdt::SharedDatatype)
haskey(f.h5jltype, sdt) && return f.h5jltype[sdt]::ReadRepresentation
dt, attrs = read_shared_datatype(f, sdt)
rr = jltype(f, dt)
f.h5jltype[sdt] = rr
end



# Constructs a ReadRepresentation for a given opaque (bitstype) type
function constructrr(::JLDFile, T::DataType, dt::BasicDatatype, attrs::Vector{ReadAttribute})
Expand Down Expand Up @@ -381,7 +362,7 @@
# If the reference is to a committed datatype, read the datatype
nulldt = CommittedDatatype(UNDEFINED_ADDRESS, 0)
cdt = get(f.datatype_locations, ref, nulldt)
res = cdt !== nulldt ? (typeof(jltype(f, cdt)::ReadRepresentation)::DataType).parameters[1] : load_dataset(f, ref)
res = cdt !== nulldt ? eltype(jltype(f, cdt)) : load_dataset(f, ref)
unknown_params = unknown_params || isunknowntype(res) || isreconstructed(res)
res
end for ref in refs]
Expand Down
42 changes: 9 additions & 33 deletions src/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,48 +66,24 @@
filters::FilterPipeline=FilterPipeline(),
header_offset::RelOffset=NULL_REFERENCE,
attributes::Union{Vector{ReadAttribute},Nothing}=nothing)
# See if there is a julia type attribute
io = f.io
if dt isa SharedDatatype
# this means that it is "committed" to `_types` if the file was written by JLD2
rr = jltype(f, get(f.datatype_locations, dt.header_offset, dt))

if layout.data_offset == -1
# There was no layout message.
# That means, this dataset is just a datatype
# return the Datatype
return typeof(rr).parameters[1]
end

seek(io, layout.data_offset)
read_dataspace = (dataspace, header_offset, layout, filters)
read_data(f, rr, read_dataspace, attributes)

rr = jltype(f, dt)
if layout.data_offset == -1
# There was no layout message.
# That means, this dataset is just a datatype
return typeof(rr).parameters[1]

Check warning on line 73 in src/datasets.jl

View check run for this annotation

Codecov / codecov/patch

src/datasets.jl#L73

Added line #L73 was not covered by tests
elseif layout.data_offset == typemax(Int64)
rr = jltype(f, dt)
T,S = typeof(rr).parameters
T,_ = typeof(rr).parameters
if layout.data_length > -1
# TODO: this could use the fill value message to populate the array
@warn "This array should be populated by a fill value. This is not (yet) implemented."
end
v = Array{T, 1}()
track_weakref!(f, header_offset, v)
return v
else
dtt = dt
rr = jltype(f, dtt)

if layout.data_offset == -1
# There was no layout message.
# That means, this dataset is just a datatype
# return the Datatype
return typeof(rr).parameters[1]
end

seek(io, layout.data_offset)
read_dataspace = (dataspace, header_offset, layout, filters)
read_data(f, rr, read_dataspace, attributes)
end
seek(f.io, layout.data_offset)
read_dataspace = (dataspace, header_offset, layout, filters)
read_data(f, rr, read_dataspace, attributes)
end

# Most types can only be scalars or arrays
Expand Down
14 changes: 7 additions & 7 deletions src/datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ struct BasicDatatype <: H5Datatype
end
define_packed(BasicDatatype)
StringDatatype(::Type{String}, size::Integer) =
BasicDatatype(DT_STRING, 0x11, 0x00, 0x00, size)
BasicDatatype(UInt8(DT_STRING) | 0x3<<4, 0x11, 0x00, 0x00, size)
OpaqueDatatype(size::Integer) =
BasicDatatype(DT_OPAQUE, 0x00, 0x00, 0x00, size) # XXX make sure ignoring the tag is OK
BasicDatatype(UInt8(DT_OPAQUE) | 0x3<<4, 0x00, 0x00, 0x00, size) # XXX make sure ignoring the tag is OK
ReferenceDatatype() =
BasicDatatype(DT_REFERENCE, 0x00, 0x00, 0x00, jlsizeof(RelOffset))
BasicDatatype(UInt8(DT_REFERENCE) | 0x3<<4, 0x00, 0x00, 0x00, jlsizeof(RelOffset))

function Base.:(==)(dt1::BasicDatatype, dt2::BasicDatatype)
ret = true
Expand Down Expand Up @@ -124,7 +124,7 @@ struct BitFieldDatatype <: H5Datatype
end
define_packed(BitFieldDatatype)
BitFieldDatatype(size) =
BitFieldDatatype(DT_BITFIELD, 0x00, 0x00, 0x00, size, 0, 8*size)
BitFieldDatatype(UInt8(DT_BITFIELD) | 0x3<<4, 0x00, 0x00, 0x00, size, 0, 8*size)


struct FloatingPointDatatype <: H5Datatype
Expand Down Expand Up @@ -198,7 +198,7 @@ end

function jlwrite(io::IO, dt::CompoundDatatype)
n = length(dt.names)
jlwrite(io, BasicDatatype(DT_COMPOUND, n % UInt8, (n >> 8) % UInt8, 0x00, dt.size))
jlwrite(io, BasicDatatype(UInt8(DT_COMPOUND) | 0x3<<4, n % UInt8, (n >> 8) % UInt8, 0x00, dt.size))
for i = 1:length(dt.names)
# Name
name = dt.names[i]
Expand Down Expand Up @@ -273,7 +273,7 @@ struct VariableLengthDatatype{T<:H5Datatype} <: H5Datatype
basetype::T
end
VariableLengthDatatype(basetype::H5Datatype) =
VariableLengthDatatype{typeof(basetype)}(DT_VARIABLE_LENGTH, 0x00, 0x00, 0x00, 8+jlsizeof(RelOffset), basetype)
VariableLengthDatatype{typeof(basetype)}(UInt8(DT_VARIABLE_LENGTH) | 0x3<<4, 0x00, 0x00, 0x00, 8+jlsizeof(RelOffset), basetype)
VariableLengthDatatype(class, bitfield1, bitfield2, bitfield3, size, basetype::H5Datatype) =
VariableLengthDatatype{typeof(basetype)}(class, bitfield1, bitfield2, bitfield3, size, basetype)

Expand All @@ -288,7 +288,7 @@ jlsizeof(dt::VariableLengthDatatype) =
jlsizeof(BasicDatatype) + jlsizeof(dt.basetype)

function jlwrite(io::IO, dt::VariableLengthDatatype)
jlwrite(io, BasicDatatype(DT_VARIABLE_LENGTH, dt.bitfield1, dt.bitfield2, dt.bitfield3, dt.size))
jlwrite(io, BasicDatatype(UInt8(DT_VARIABLE_LENGTH) | 0x3<<4, dt.bitfield1, dt.bitfield2, dt.bitfield3, dt.size))
jlwrite(io, dt.basetype)
end

Expand Down
65 changes: 53 additions & 12 deletions src/explicit_datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
DataLayout(f, dset.layout),
isnothing(dset.filters) ? FilterPipeline() : dset.filters,
dset.offset,
collect(values(dset.attributes)))
collect(ReadAttribute, values(dset.attributes)))
end

"""
Expand Down Expand Up @@ -392,11 +392,7 @@
iswritten(dset) || return false
f = dset.parent.f
dt = dset.datatype
if dt isa SharedDatatype
rr = jltype(f, get(f.datatype_locations, dt.header_offset, dt))
else
rr = jltype(f, dt)
end
rr = jltype(f, dt)
T = typeof(rr).parameters[1]
!(samelayout(T)) && return false
!isempty(dset.filters.filters) && return false
Expand Down Expand Up @@ -424,11 +420,7 @@

# figure out the element type
dt = dset.datatype
if dt isa SharedDatatype
rr = jltype(f, get(f.datatype_locations, dt.header_offset, dt))
else
rr = jltype(f, dt)
end
rr = jltype(f, dt)
T = typeof(rr).parameters[1]
ndims, offset = get_ndims_offset(f, ReadDataspace(f, dset.dataspace), collect(values(dset.attributes)))

Expand Down Expand Up @@ -518,4 +510,53 @@
end
return offset
end
end
end

struct ArrayDataset{T, N, ODR, io} <: AbstractArray{T, N}
f::JLDFile{io}
dset::Dataset
dims::NTuple{N, Int}
data_address::Int64
rr::ReadRepresentation{T, ODR}
end
function ArrayDataset(dset::Dataset)
isarraydataset(dset) || throw(ArgumentError("Dataset is not an array"))
iscompressed(dset.filters) && throw(UnsupportedFeatureException("Compressed datasets are not supported."))
f = dset.parent.f
dt = dset.datatype
return ArrayDataset(
f, dset,
Int.(reverse(dset.dataspace.dimensions)),
fileoffset(f, dset.layout.data_address),
jltype(f, !(f.plain) && dt isa SharedDatatype ? get(f.datatype_locations, dt.header_offset, dt) : dt)
)
end

function isarraydataset(dset::Dataset)
isnothing(dset.dataspace) && return false
ds = dset.dataspace
if ds isa HmWrap{HmDataspace}
return ds.dataspace_type == DS_SIMPLE || ds.dataspace_type == DS_V1
end
return false

Check warning on line 541 in src/explicit_datasets.jl

View check run for this annotation

Codecov / codecov/patch

src/explicit_datasets.jl#L541

Added line #L541 was not covered by tests
end

Base.IndexStyle(::Type{<:ArrayDataset}) = IndexLinear()
Base.size(A::ArrayDataset) = A.dims
Base.getindex(dset::Dataset, I...) = ArrayDataset(dset)[I...]
Base.getindex(dset::Dataset) = read_dataset(dset)
Base.setindex!(dset::Dataset, v, i, I...) = Base.setindex!(ArrayDataset(dset), v, i, I...)

function Base.getindex(A::ArrayDataset, i::Integer)
@boundscheck checkbounds(A, i)
seek(A.f.io, A.data_address + (i-1)*odr_sizeof(A.rr))
return read_scalar(A.f, A.rr, UNDEFINED_ADDRESS)
end

function Base.setindex!(A::ArrayDataset{T,N,ODR}, v, i::Integer) where {T,N,ODR}
@boundscheck checkbounds(A, i)
A.f.writable || throw(ArgumentError("Cannot edit in read-only mode"))
seek(A.f.io, A.data_address + (i-1)*odr_sizeof(A.rr))
write_data(A.f.io, A.f, v, T, datamode(ODR), JLDWriteSession())
return v
end
2 changes: 1 addition & 1 deletion src/macros_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ function linefun(ex)
increment = esc(n)
off_inc = :($offset += $increment)
write_inc = :(write_zerobytes(io, $increment))
return [write_inc, off_inc, off_inc]
return [write_inc, off_inc, :(skip($io, $increment))]
elseif @capture(ex, s_Symbol::T_) || @capture(ex, s_Symbol::T_ = v_)
getprop_ = :($(esc(s)) = $kw.$(s))
default = Symbol(s,"_default")
Expand Down
32 changes: 32 additions & 0 deletions test/dataset_api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,36 @@ using JLD2, Test
end
@test load(fn)["d"] == zeros(1000,1000)
end
end

@testset "Slicing & Updating" begin
cd(mktempdir()) do
fn = "test.jld2"
jldsave(fn; a=42, b = [42 43 44; 45 46 47], c = [(0x00, 1f0), (0x42, 2f0)])
jldopen(fn) do f
dset = JLD2.get_dataset(f, "a")
@test dset[] == 42

dset = JLD2.get_dataset(f, "b")
@test dset[] == [42 43 44; 45 46 47]
@test dset[1] == 42
@test dset[1,1] == 42
@test dset[1:2, 1:2] == [42 43; 45 46]
@test dset[1,1:2:3] == [42, 44]
@test_throws BoundsError dset[7]
@test_throws BoundsError dset[2,4]
@test_throws ArgumentError dset[1] = 1
end
jldopen(fn, "a") do f
dset = JLD2.get_dataset(f, "b")
dset[2] = -1
@test dset[] == [42 43 44; -1 46 47]
dset[1,1:2:3] = [1,5]
@test dset[] == [1 43 5; -1 46 47]

dset = JLD2.get_dataset(f, "c")
dset[2] = (0xff, 0f0)
@test f["c"] == [(0x00, 1f0), (0xff, 0f0)]
end
end
end
Loading