Skip to content

Commit

Permalink
fill value and write support
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander-Barth committed Feb 18, 2024
1 parent 10f96ce commit f8a6e0d
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 27 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "0.1.0"
CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"

[compat]
Expand Down
3 changes: 2 additions & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Modules = [ZarrDatasets]

### Differences between Zarr and NetCDF files

* All metadata is stored in JSON files for Zarr with the following implications:
* All metadata (in particular attributes) is stored in JSON files for the Zarr format with the following implications:
* JSON does not distinguish between integers and real numbers. They are all considered as generic numbers. Whole numbers are loaded as `Int64` and decimal numbers `Float64`. It is not possible to store the number `1.0` as a real number.
* The order of keys in a JSON document is undefined. It is therefore not possible to have a consistent ordering of the attributes or variables.
* The JSON standard does not allow NaN, +Inf, -Inf (https://github.com/capnproto/capnproto/issues/261).
4 changes: 4 additions & 0 deletions src/ZarrDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ import CommonDataModel:
attrib,
attribnames,
dataset,
defAttrib,
defVar,
defDim,
dim,
dimnames,
iswritable,
Expand All @@ -29,6 +32,7 @@ import DiskArrays:
import CommonDataModel as CDM
using DataStructures
using Zarr
import JSON

include("types.jl")
include("dataset.jl")
Expand Down
70 changes: 47 additions & 23 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,7 @@ function CDM.variable(ds::ZarrDataset,varname::SymbolOrString)
ZarrVariable{eltype(zarray),ndims(zarray),typeof(zarray),typeof(ds)}(zarray,ds)
end

CDM.dimnames(ds::ZarrDataset) = Tuple(
sort(
unique(
reduce(vcat,
(collect(dimnames(variable(ds,vn))) for vn in keys(ds)),
init = String[]
))))
CDM.dimnames(ds::ZarrDataset) = Tuple(String.(keys(ds.dimensions)))

# function CDM.unlimited(ds::ZarrDataset)
# ul = ds.unlimited
Expand All @@ -33,31 +27,35 @@ CDM.dimnames(ds::ZarrDataset) = Tuple(
# return nothing
# end

function CDM.dim(ds::ZarrDataset,dimname::SymbolOrString)
CDM.dim(ds::ZarrDataset,dimname::SymbolOrString) = ds.dimensions[Symbol(dimname)]

for vn in keys(ds)
v = variable(ds,vn)
dn = dimnames(v)
i = findfirst(==(dimname),dn)
if !isnothing(i)
return size(v,i)
end
end
error("dimension $dimname not found")
function CDM.defDim(ds::ZarrDataset,dimname::SymbolOrString,dimlen)
dn = Symbol(dimname)
@assert !haskey(ds.dimensions,dn)
ds.dimensions[dn] = dimlen
end

CDM.varnames(ds::ZarrDataset) = keys(ds.zgroup.arrays)

CDM.attribnames(ds::ZarrDataset) = keys(ds.zgroup.attrs)
CDM.attrib(ds::ZarrDataset,name::SymbolOrString) = ds.zgroup.attrs[String(name)]

function CDM.defAttrib(ds::ZarrDataset,name::SymbolOrString,value)
@assert iswritable(ds)
ds.zgroup.attrs[String(name)] = value

storage = ds.zgroup.storage
io = IOBuffer()
JSON.print(io, ds.zgroup.attrs)
storage[ds.zgroup.path,".zattrs"] = take!(io)
end

CDM.groupnames(ds::ZarrDataset) = keys(ds.zgroup.groups)
CDM.group(ds::ZarrDataset,name::SymbolOrString) = ZarrDataset(ds.zgroup.groups,String(name),ds)


CDM.parentdataset(ds::ZarrDataset) = ds.parentdataset
CDM.iswritable(ds::ZarrDataset) = false
CDM.iswritable(ds::ZarrDataset) = ds.iswritable
CDM.maskingvalue(ds::ZarrDataset) = ds.maskingvalue


Expand Down Expand Up @@ -108,12 +106,38 @@ end # implicit call to close(ds)
function ZarrDataset(url::AbstractString,mode = "r";
parentdataset = nothing,
_omitcode = 404,
maskingvalue = missing)
ds = Zarr.zopen(url,mode)
if ds.storage isa Zarr.HTTPStore
Zarr.missing_chunk_return_code!(ds.storage,_omitcode)
maskingvalue = missing,
attrib = Dict(),
)

dimensions = OrderedDict{Symbol,Int}()
iswritable = false

if mode == "r"
zg = Zarr.zopen(url,mode)
if (zg.storage isa Zarr.HTTPStore) ||
(zg.storage isa Zarr.ConsolidatedStore{Zarr.HTTPStore})
@debug "omit chunks on HTTP error" _omitcode
Zarr.missing_chunk_return_code!(zg.storage,_omitcode)
end

for (varname,zarray) in zg.arrays
for (dimname,dimlen) in zip(reverse(zarray.attrs["_ARRAY_DIMENSIONS"]),size(zarray))

dn = Symbol(dimname)
if haskey(dimensions,dn)
@assert dimensions[dn] == dimlen
else
dimensions[dn] = dimlen
end
end
end
elseif mode == "c"
store = Zarr.DirectoryStore(url)
zg = zgroup(store, "",attrs = Dict(attrib))
iswritable = true
end
ZarrDataset(ds,parentdataset,maskingvalue)
ZarrDataset(zg,parentdataset,dimensions,iswritable,maskingvalue)
end


Expand Down
2 changes: 2 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ end
struct ZarrDataset{TZ,TP,Tmaskingvalue} <: CDM.AbstractDataset
zgroup::TZ
parentdataset::TP
dimensions::OrderedDict{Symbol,Int}
iswritable::Bool
maskingvalue::Tmaskingvalue
end
53 changes: 51 additions & 2 deletions src/variable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,61 @@ CDM.name(v::ZarrVariable) = Zarr.zname(v.zarray)
CDM.dimnames(v::ZarrVariable) = Tuple(reverse(v.zarray.attrs["_ARRAY_DIMENSIONS"]))
CDM.dataset(v::ZarrVariable) = v.parentdataset

CDM.attribnames(v::ZarrVariable) = filter(!=("_ARRAY_DIMENSIONS"),keys(v.zarray.attrs))
CDM.attrib(v::ZarrVariable,name::SymbolOrString) = v.zarray.attrs[String(name)]
function CDM.attribnames(v::ZarrVariable)
names = filter(!=("_ARRAY_DIMENSIONS"),keys(v.zarray.attrs))
if !isnothing(v.zarray.metadata.fill_value)
push!(names,"_FillValue")
end
return names
end

function CDM.attrib(v::ZarrVariable,name::SymbolOrString)
if String(name) == "_FillValue" && !isnothing(v.zarray.metadata.fill_value)
return v.zarray.metadata.fill_value
end
return v.zarray.attrs[String(name)]
end

function CDM.defAttrib(v::ZarrVariable,name::SymbolOrString,value)
@assert iswritable(dataset(v))
@assert String(name) !== "_FillValue"

v.zarray.attrs[String(name)] = value

storage = v.zarray.storage
io = IOBuffer()
JSON.print(io, v.zarray.attrs)
storage[v.zarray.path,".zattrs"] = take!(io)
end


# DiskArray methods
eachchunk(v::ZarrVariable) = eachchunk(v.zarray)
haschunks(v::ZarrVariable) = haschunks(v.zarray)
eachchunk(v::CFVariable{T,N,<:ZarrVariable}) where {T,N} = eachchunk(v.var)
haschunks(v::CFVariable{T,N,<:ZarrVariable}) where {T,N} = haschunks(v.var)


function CDM.defVar(ds::ZarrDataset,name::SymbolOrString,vtype::DataType,dimensionnames; chunksizes=nothing, attrib = Dict(), kwargs...)
@assert iswritable(ds)

_attrib = Dict(attrib)
_attrib["_ARRAY_DIMENSIONS"] = reverse(dimensionnames)

_size = ntuple(length(dimensionnames)) do i
ds.dimensions[Symbol(dimensionnames[i])]
end

if isnothing(chunksizes)
chunksizes = _size
end
zarray = zcreate(
vtype, ds.zgroup, name, _size...;
chunks = chunksizes,
attrs = _attrib,
kwargs...
)

return ZarrVariable{vtype,ndims(zarray),typeof(zarray),typeof(ds)}(
zarray,ds)
end
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ using ZarrDatasets
@testset "ZarrDatasets.jl" begin
include("test_cdm.jl")
include("test_multifile.jl")
include("test_write.jl")
end
7 changes: 6 additions & 1 deletion test/test_cdm.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
using CommonDataModel: iswritable, attribnames, parentdataset, load!, dataset
using CommonDataModel:
attribnames,
dataset,
iswritable,
load!,
parentdataset
using Dates
using DiskArrays
using NCDatasets
Expand Down
49 changes: 49 additions & 0 deletions test/test_write.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using ZarrDatasets
using ZarrDatasets:
defDim,
defVar,
defAttrib
using Zarr
using DataStructures

data = rand(Int32,3,5)

fname = tempname()
mkdir(fname)
gattrib = Dict{String,Any}("title" => "this is the title")
ds = ZarrDataset(fname,"c",attrib = gattrib)

defDim(ds,"lon",3)
defDim(ds,"lat",5)

attrib = Dict{String,Any}(
"units" => "m/s",
"long_name" => "test",
)


varname = "var2"
dimensionnames = ("lon","lat")
vtype = Int32

zv = defVar(ds,varname,vtype,dimensionnames, attrib = attrib)
zv[:,:] = data
zv.attrib["lala"] = 12
zv.attrib["standard_name"] = "test"
ds.attrib["history"] = "test"
close(ds)

ds = ZarrDataset(fname)

zv = ds[varname]

@test zv.attrib["lala"] == 12
@test zv.attrib["standard_name"] == "test"
@test ds.attrib["history"] == "test"

@test zv[:,:] == data

io = IOBuffer()
show(io,ds)
str = String(take!(io))
@test occursin("Global",str)

0 comments on commit f8a6e0d

Please sign in to comment.