Skip to content

Commit

Permalink
Merge pull request #15 from JuliaGeo/refactor-cf
Browse files Browse the repository at this point in the history
Refactor CF convention code and other code from NCDatasets in CommonDataSet
  • Loading branch information
Alexander-Barth committed Nov 14, 2023
2 parents d51ad82 + 5c582d2 commit 03c367f
Show file tree
Hide file tree
Showing 11 changed files with 505 additions and 68 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ authors = ["Alexander Barth <[email protected]>"]
keywords = ["netcdf", "GRIB", "climate and forecast conventions", "oceanography", "meteorology", "climatology", "opendap"]
license = "MIT"
desc = "CommonDataModel is a module that defines types common to NetCDF and GRIB data"
version = "0.2.5"
version = "0.2.6"

[deps]
CFTime = "179af706-886a-5703-950a-314cd64e0468"
Expand Down
57 changes: 3 additions & 54 deletions src/CommonDataModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,68 +4,17 @@ using CFTime
using Dates
using Printf
using Preferences
import Base: isopen, show, display, close
import Base: isopen, show, display, close, filter
using DataStructures

"""
`AbstractDataset` is a collection of multidimensional variables (for example a
NetCDF or GRIB file)
A data set `ds` of a type derived from `AbstractDataset` should implemented at minimum:
* `Base.key(ds)`: return a list of variable names as strings
* `variable(ds,varname::String)`: return an array-like data structure (derived from `AbstractVariable`) of the variables corresponding to `varname`. This array-like data structure should follow the CF semantics.
* `dimnames(ds)`: should be an iterable with all dimension names in the data set `ds`
* `dim(ds,name)`: dimension value corresponding to name
Optionally a data set can have attributes and groups:
* `attribnames(ds)`: should be an iterable with all attribute names
* `attrib(ds,name)`: attribute value corresponding to name
* `groupnames(ds)`: should be an iterable with all group names
* `group(ds,name)`: group corresponding to the name
For a writable-dataset, one should also implement:
* `defDim`: define a dimension
* `defAttrib`: define a attribute
* `defVar`: define a variable
* `defGroup`: define a group
"""
abstract type AbstractDataset
end


"""
`AbstractVariable{T,N}` is a subclass of `AbstractArray{T, N}`. A variable `v` of a type derived from `AbstractVariable` should implement:
* `name(v)`: should be the name of variable within the data set
* `dimnames(v)`: should be a iterable data structure with all dimension names of the variable `v`
* `dataset(v)`: the parent dataset containing `v`
* `Base.size(v)`: the size of the variable
* `Base.getindex(v,indices...)`: get the data of `v` at the provided indices
Optionally a variable can have attributes:
* `attribnames(v)`: should be an iterable with all attribute names
* `attrib(v,name)`: attribute value corresponding to name
For a writable-dataset, one should also implement:
* `defAttrib`: define a attribute
* `Base.setindex(v,data,indices...)`: set the data in `v` at the provided indices
"""
abstract type AbstractVariable{T,N} <: AbstractArray{T, N}
end


const SymbolOrString = Union{Symbol, AbstractString}

include("types.jl")
include("dataset.jl")
include("variable.jl")
include("cfvariable.jl")
include("attribute.jl")
include("dimension.jl")
include("cfconventions.jl")

end # module CommonDataModel

Expand Down
45 changes: 45 additions & 0 deletions src/attribute.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,51 @@ function show_attrib(io,a)
print(io,"\n")
end
catch err
@debug "error while printing" err
print(io,"Dataset attributes (file closed)")
end
end


"""
Base.keys(a::Attributes)
Return a list of the names of all attributes.
"""
Base.keys(a::Attributes) = attribnames(a.ds)


"""
getindex(a::Attributes,name::SymbolOrString)
Return the value of the attribute called `name` from the
attribute list `a`. Generally the attributes are loaded by
indexing, for example:
```julia
using NCDatasets
ds = NCDataset("file.nc")
title = ds.attrib["title"]
```
"""
Base.getindex(a::Attributes,name) = attrib(a.ds,name)


"""
Base.setindex!(a::Attributes,data,name::SymbolOrString)
Set the attribute called `name` to the value `data` in the
attribute list `a`. `data` can be a vector or a scalar. A scalar
is handeld as a vector with one element in the NetCDF data model.
Generally the attributes are defined by indexing, for example:
```julia
ds = NCDataset("file.nc","c")
ds.attrib["title"] = "my title"
close(ds)
```
"""
Base.setindex!(a::Attributes,data,name) = defAttrib(a.ds,name,data)

Base.show(io::IO,a::Attributes) = show_attrib(io,a)
187 changes: 187 additions & 0 deletions src/cfconventions.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@

CFStdName(n::AbstractString) = CFStdName(Symbol(n))

macro CF_str(n)
CFStdName(n)
end

import Base.string
Base.string(n::CFStdName) = string(n.name)


"""
ncvar = NCDatasets.ancillaryvariables(ncv::NCDatasets.CFVariable,modifier)
Return the first ancillary variables from the NetCDF variable `ncv` with the
standard name modifier `modifier`. It can be used for example to access
related variable like status flags.
"""
function ancillaryvariables(ncv::CFVariable,modifier)
ds = dataset(ncv)
varname = name(ncv)

if !haskey(ncv.attrib,"ancillary_variables")
return nothing
end

ancillary_variables = split(ncv.attrib["ancillary_variables"])

for ancillary_variable in ancillary_variables
ncv_ancillary = ds[ancillary_variable]
if occursin(modifier,ncv_ancillary.attrib["standard_name"])
@debug ancillary_variable
return ncv_ancillary
end
end

# nothing found
return nothing
end


allowmissing(x::AbstractArray{T}) where {T} = convert(AbstractArray{Union{T, Missing}}, x)

"""
data = filter(ncv, indices...; accepted_status_flags = nothing)
Load and filter observations by replacing all variables without an acepted status
flag to `missing`. It is used the attribute `ancillary_variables` to identify
the status flag.
```
# da["data"] is 2D matrix
good_data = NCDatasets.filter(ds["data"],:,:, accepted_status_flags = ["good_data","probably_good_data"])
```
"""
function filter(ncv::AbstractVariable, indices...; accepted_status_flags = nothing)
#function filter_(ncv, indices...)
# accepted_status_flags = ("good_value", "probably_good_value")
data = allowmissing(ncv[indices...])

if (accepted_status_flags != nothing)
ncv_ancillary = ancillaryvariables(ncv,"status_flag");
if ncv_ancillary == nothing
error("no variable with the attribute status_flag as standard_name among $(ancillary_variables) found")
end

flag_values = ncv_ancillary.attrib["flag_values"]
flag_meanings = ncv_ancillary.attrib["flag_meanings"]::String
if typeof(flag_meanings) <: AbstractString
flag_meanings = split(flag_meanings)
end

accepted_status_flag_values = zeros(eltype(flag_values),length(accepted_status_flags))
for i = eachindex(accepted_status_flags,accepted_status_flag_values)
tmp = findfirst(accepted_status_flags[i] .== flag_meanings)

if tmp == nothing
error("cannot recognise flag $(accepted_status_flags[i])")
end
accepted_status_flag_values[i] = flag_values[tmp]
end
#@debug accepted_status_flag_values

dataflag = ncv_ancillary.var[indices...];
for i in eachindex(data)
good = false;
for accepted_status_flag_value in accepted_status_flag_values
good = good || (dataflag[i] .== accepted_status_flag_value)
end
if !good
#@show i,dataflag[i]
data[i] = missing
end
end
end

return data
end


"""
cv = coord(v::Union{CFVariable,Variable},standard_name)
Find the coordinate of the variable `v` by the standard name `standard_name`
or some [standardized heuristics based on units](https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate). If the heuristics fail to detect the coordinate,
consider to modify the netCDF file to add the `standard_name` attribute.
All dimensions of the coordinate must also be dimensions of the variable `v`.
## Example
```julia
using NCDatasets
ds = NCDataset("file.nc")
ncv = ds["SST"]
lon = coord(ncv,"longitude")[:]
lat = coord(ncv,"latitude")[:]
v = ncv[:]
close(ds)
```
"""
function coord(v::AbstractVariable,standard_name)
matches = Dict(
"time" => [r".*since.*"],
# It is great to have choice!
# https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate
"longitude" => [r"degree east",r"degrees east",r"degrees_east",
r"degree_east", r"degree_E", r"degrees_E",
r"degreeE", r"degreesE"],
"latitude" => [r"degree north",r"degrees north",r"degrees_north",
r"degree_north", r"degree_N", r"degrees_N", r"degreeN",
r"degreesN"],
)

ds = dataset(v)
dims = Set(dimnames(v))

# find by standard name
for coord in varbyattrib(ds,standard_name = standard_name)
if Set(dimnames(coord)) dims
return coord
end
end

# find by units
if haskey(matches,standard_name)
# prefer e.g. vectors over scalars
# this is necessary for ROMS model output
coordfound = nothing
coordndims = -1

for (_,coord) in ds
units = get(coord.attrib,"units","")

for re in matches[standard_name]
if match(re,units) != nothing
if Set(dimnames(coord)) dims
if ndims(coord) > coordndims
coordfound = coord
coordndims = ndims(coord)
end
end
end
end
end

return coordfound
end

return nothing
end




"""
b = bounds(ncvar::NCDatasets.CFVariable)
Return the CFVariable corresponding to the `bounds` attribute of the variable `ncvar`.
The time units and calendar from the `ncvar` are used but not the
attributes controling the
packing of data `scale_factor`, `add_offset` and `_FillValue`.
"""
function bounds(ncvar::CFVariable)
ds = dataset(ncvar)
varname = ncvar.attrib["bounds"]
return ds[varname]
end
27 changes: 25 additions & 2 deletions src/cfvariable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,7 @@ end
@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)


function Base.getindex(v::CFVariable,
indexes::Union{Int,Colon,AbstractRange{<:Integer},AbstractVector{<:Integer}}...)
function Base.getindex(v::CFVariable, indexes::Union{Integer,Colon,AbstractRange{<:Integer},AbstractVector{<:Integer}}...)
data = v.var[indexes...]
return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
time_origin(v),time_factor(v),eltype(v))
Expand Down Expand Up @@ -459,3 +458,27 @@ function _getattrib(ds,v,parentname,attribname,default)
end
end
end



function _isrelated(v1::AbstractVariable,v2::AbstractVariable)
dimnames(v1) dimnames(v2)
end

function Base.keys(v::AbstractVariable)
ds = dataset(v)
return [varname for (varname,ncvar) in ds if _isrelated(ncvar,v)]
end


function Base.getindex(v::AbstractVariable,name::SymbolOrString)
ds = dataset(v)
ncvar = ds[name]
if _isrelated(ncvar,v)
return ncvar
else
throw(KeyError(name))
end
end

Base.getindex(v::CFVariable,n::CFStdName) = getindex_byname(v,n)
Loading

0 comments on commit 03c367f

Please sign in to comment.