Skip to content

Commit

Permalink
Add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
asinghvi17 committed Sep 14, 2024
1 parent e10d21f commit f57bea1
Show file tree
Hide file tree
Showing 20 changed files with 117 additions and 17 deletions.
25 changes: 25 additions & 0 deletions test/corrections.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
using JSON3, Kerchunk, Test

zarray_sst = "{\n \"chunks\": [\n 226,\n 226\n ],\n \"compressor\": {\n \"id\": \"zlib\",\n \"level\": 1\n },\n \"dtype\": \"<i2\",\n \"fill_value\": -1,\n \"filters\": null,\n \"order\": \"C\",\n \"shape\": [\n 5424,\n 5424\n ],\n \"zarr_format\": 2\n}"

zattrs_sst = "{\n \"_ARRAY_DIMENSIONS\": [\n \"y\",\n \"x\"\n ],\n \"_FillValue\": -1,\n \"_Netcdf4Dimid\": 0,\n \"_Unsigned\": \"true\",\n \"add_offset\": 180.0,\n \"algorithm_type\": \"regression\",\n \"ancillary_variables\": \"DQF\",\n \"cell_methods\": \"retrieval_local_zenith_angle: point (good or degraded quality pixel produced) quantitative_local_zenith_angle: point (good quality pixel produced) retrieval_solar_zenith_angle: point (good quality pixel produced) t: point area: point\",\n \"coordinates\": \"retrieval_local_zenith_angle quantitative_local_zenith_angle retrieval_solar_zenith_angle t y x\",\n \"grid_mapping\": \"goes_imager_projection\",\n \"long_name\": \"ABI L2+ Sea Surface (Skin) Temperature\",\n \"resolution\": \"y: 0.000056 rad x: 0.000056 rad\",\n \"scale_factor\": 0.0024416300002485514,\n \"standard_name\": \"sea_surface_skin_temperature\",\n \"units\": \"K\",\n \"valid_range\": [\n 0,\n -6\n ]\n}"


@testset "CF scale/offset/mask" begin
zarray = Zarr.JSON.parse(zarray_sst)
zattrs = Zarr.JSON.parse(zattrs_sst)

old_fillvalue = zattrs["_FillValue"]
old_correct_fillvalue = reinterpret(UInt16, Int16(zattrs["_FillValue"]))
old_scalefactor = zattrs["scale_factor"]
old_offset = zattrs["add_offset"]

Kerchunk.add_scale_offset_filter_and_set_mask!(zarray, zattrs)

# test that FSO + Astype filters were added
@test "fixedscaleoffset" in getindex.(zarray["filters"], "id")
@test "astype" in getindex.(zarray["filters"], "id")

# test that the fill value was appropriately translated
@test zarray["fill_value"] == old_correct_fillvalue * old_scalefactor + old_offset
end
File renamed without changes.
1 change: 1 addition & 0 deletions test/data/mur_sst.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/data/test.zarr/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"zarr_format":2}
1 change: 1 addition & 0 deletions test/data/test.zarr/ti/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunks":[13],"compressor":null,"dtype":"<f8","fill_value":null,"filters":null,"order":"C","shape":[13],"zarr_format":2}
1 change: 1 addition & 0 deletions test/data/test.zarr/ti/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_ARRAY_DIMENSIONS":["ti"],"axis":"T","standard_name":"time","units":"days since 1900-01-01 00:00:00"}
Binary file added test/data/test.zarr/ti/0
Binary file not shown.
1 change: 1 addition & 0 deletions test/data/test.zarr/unnamed/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunks":[13,146,100],"compressor":null,"dtype":"<f8","fill_value":null,"filters":null,"order":"C","shape":[13,146,100],"zarr_format":2}
1 change: 1 addition & 0 deletions test/data/test.zarr/unnamed/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_ARRAY_DIMENSIONS":["ti","y","x"]}
Binary file added test/data/test.zarr/unnamed/0.0.0
Binary file not shown.
1 change: 1 addition & 0 deletions test/data/test.zarr/x/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunks":[100],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[100],"zarr_format":2}
1 change: 1 addition & 0 deletions test/data/test.zarr/x/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_ARRAY_DIMENSIONS":["x"],"axis":"X"}
Binary file added test/data/test.zarr/x/0
Binary file not shown.
1 change: 1 addition & 0 deletions test/data/test.zarr/y/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"chunks":[146],"compressor":null,"dtype":"<i8","fill_value":null,"filters":null,"order":"C","shape":[146],"zarr_format":2}
1 change: 1 addition & 0 deletions test/data/test.zarr/y/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_ARRAY_DIMENSIONS":["y"],"axis":"Y"}
Binary file added test/data/test.zarr/y/0
Binary file not shown.
5 changes: 1 addition & 4 deletions test/its_live_catalog.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@ using JSON3, Kerchunk, Zarr, YAXArrays
using Test

@testset "ITS_LIVE catalog" begin
catalog_json = JSON3.read(open(joinpath(dirname(dirname(pathof(Kerchunk))), "test", "its_live_catalog.json")))
catalog_json = JSON3.read(open(joinpath(dirname(dirname(pathof(Kerchunk))), "test", "data", "its_live_catalog.json")))
arbitrary_choice_dictionary = catalog_json[first(keys(catalog_json))]
st = Kerchunk.ReferenceStore(arbitrary_choice_dictionary)
za = Zarr.zopen(st)
@test_nowarn za["vx"][1, 1] # test that reading works
end

# test ICESAT2 data
# p"s3://mymdtemp/icesat2-4.01.json"
24 changes: 13 additions & 11 deletions test/python_local_kerchunk.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,36 @@ using Test
CondaPkg.withenv() do
run(```
$(CondaPkg.which("python")) -c "
import kerchunk
import kerchunk.hdf as hdf; import os; import ujson
h5chunks = hdf.SingleHdf5ToZarr('test.nc', inline_threshold=300)
with open('test.json', 'w') as f:
f.write(ujson.dumps(h5chunks.translate()))
"
```) # strange indenting because I had weird Python indentation issues when there were spaces...
```) # strange indenting because I had weird Python indentation issues when there were spaces...
end

py_kerchunk_catalog = JSON3.read(read("test.json", String))

st = Kerchunk.ReferenceStore("test.json")
st2 = Kerchunk.ReferenceStore(py_kerchunk_catalog)

#=
# explore why fsspec might be causing problems
fs, = fsspec.core.url_to_fs("s3://its-live-data/datacubes/v2/N00E020/ITS_LIVE_vel_EPSG32735_G0120_X750000_Y10050000.zarr")
fs2, = fsspec.core.url_to_fs("reference://"; fo = py_kerchunk_catalog)
st.mapper.dirfs.ls("/")
=#
st1 = Kerchunk.ReferenceStore("test.json") # read from the kerchunk
st2 = Kerchunk.ReferenceStore(py_kerchunk_catalog) # in-memory

# ds = xr.open_dataset("reference://", engine="zarr", backend_kwargs={"consolidated": False, "storage_options": {"fo" : h5chunks.translate()}})

ds = Zarr.zopen(st; consolidated = false)
ds = Zarr.zopen(st1; consolidated = false)

ya = YAXArrays.open_dataset(ds)

@test all(map(==, ya["unnamed"] |> collect, ras |> collect)) # if not, this goes to YAXArrays

# Mutate the store by translating some CF standards to Zarr
Kerchunk.apply_cf_corrections!(st1)
# Now, try again.
ds = Zarr.zopen(st1; consolidated = false)

@test all(map(==, ds["unnamed"] |> collect, ras |> collect)) # if not, this goes to YAXArrays


ds = Zarr.zopen(st2; consolidated = false)

ya = YAXArrays.open_dataset(ds)
Expand Down
56 changes: 55 additions & 1 deletion test/real_zarr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

using Zarr, Kerchunk

#=
# Use the ITS_LIVE data as an example. Get each of its subkeys using S3,
# and create a JSON file that looks like a Kerchunk catalog.
Expand All @@ -23,4 +24,57 @@ function _mockup_kerchunk(file_location, bucket, path_prefix)
end
println(f, "}")
end
end
end
=#

struct DirectoryDict{PathType} <: AbstractDict{String, Tuple{String}}
directory::PathType
end

function Base.keys(d::DirectoryDict)
return Iterators.map(walkdir(d.directory)) do (rootpath, dirs, files)
if rootpath == d.directory
return files
else
return joinpath.((normpath(relpath(rootpath, d.directory)),), files)
end
end |> Iterators.flatten |> collect
end

function Base.values(d::DirectoryDict)
return getindex.((d,), keys(d))
end

function Base.pairs(d::DirectoryDict)
ks = keys(d)
return [k => d[k] for k in ks]
end

function Base.filter(f, d::DirectoryDict)
ks = keys(d)
return Dict(Iterators.filter(f, (k => d[k] for k in ks)))
end

function Base.haskey(d::DirectoryDict, k::String)
return Base.isfile(joinpath(d.directory, k))
end

function Base.getindex(d::DirectoryDict, k::String)
return (joinpath(d.directory, k),)
end

function Base.length(d::DirectoryDict)
return sum(Iterators.map(x -> length(last(x)), walkdir(d.directory)))
end


dd = DirectoryDict(joinpath(pathof(Kerchunk) |> dirname |> dirname, "test", "data", "test.zarr"))
st = ReferenceStore(Dict("version" => "1", "refs" => dd))
@test_nowarn Zarr.zopen(st)
zg = Zarr.zopen(st)
@test isempty(setdiff(keys(zg.arrays), ("unnamed", "ti", "x", "y")))
@test_nowarn collect(zg["unnamed"])
@test_nowarn collect(zg["ti"])
# TODO: these are broken?!
@test_broken collect(zg["x"])
@test_broken collect(zg["y"])
14 changes: 13 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,17 @@ using Test
@static if !(Sys.iswindows())
include("python_local_kerchunk.jl")
end
include("its_live_catalog.jl")
if ( false ) # an ode to GEMB :P
# In all seriousness, this will only be possible to test when:
# - HTTPPaths are a thing
# - we update this catalog to switch all URLs from the s3 protocol to the new ITS_LIVE HTTP
# protocol
# The bucket you are attempting to access must be addressed using the specified endpoint.
# Please send all future requests to this endpoint.
# `its-live-data.s3-us-west-2.amazonaws.com`
include("its_live_catalog.jl")
end
include("corrections.jl")
include("real_zarr.jl")
end
end

0 comments on commit f57bea1

Please sign in to comment.