Skip to content

Commit 03c367f

Browse files
Merge pull request #15 from JuliaGeo/refactor-cf
Refactor CF convention code and other code from NCDatasets in CommonDataSet
2 parents d51ad82 + 5c582d2 commit 03c367f

File tree

11 files changed

+505
-68
lines changed

11 files changed

+505
-68
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ authors = ["Alexander Barth <[email protected]>"]
44
keywords = ["netcdf", "GRIB", "climate and forecast conventions", "oceanography", "meteorology", "climatology", "opendap"]
55
license = "MIT"
66
desc = "CommonDataModel is a module that defines types common to NetCDF and GRIB data"
7-
version = "0.2.5"
7+
version = "0.2.6"
88

99
[deps]
1010
CFTime = "179af706-886a-5703-950a-314cd64e0468"

src/CommonDataModel.jl

Lines changed: 3 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -4,68 +4,17 @@ using CFTime
44
using Dates
55
using Printf
66
using Preferences
7-
import Base: isopen, show, display, close
7+
import Base: isopen, show, display, close, filter
88
using DataStructures
99

10-
"""
11-
12-
`AbstractDataset` is a collection of multidimensional variables (for example a
13-
NetCDF or GRIB file)
14-
15-
A data set `ds` of a type derived from `AbstractDataset` should implemented at minimum:
16-
17-
* `Base.key(ds)`: return a list of variable names as strings
18-
* `variable(ds,varname::String)`: return an array-like data structure (derived from `AbstractVariable`) of the variables corresponding to `varname`. This array-like data structure should follow the CF semantics.
19-
* `dimnames(ds)`: should be an iterable with all dimension names in the data set `ds`
20-
* `dim(ds,name)`: dimension value corresponding to name
21-
22-
Optionally a data set can have attributes and groups:
23-
24-
* `attribnames(ds)`: should be an iterable with all attribute names
25-
* `attrib(ds,name)`: attribute value corresponding to name
26-
* `groupnames(ds)`: should be an iterable with all group names
27-
* `group(ds,name)`: group corresponding to the name
28-
29-
For a writable-dataset, one should also implement:
30-
* `defDim`: define a dimension
31-
* `defAttrib`: define a attribute
32-
* `defVar`: define a variable
33-
* `defGroup`: define a group
34-
"""
35-
abstract type AbstractDataset
36-
end
37-
38-
39-
"""
40-
`AbstractVariable{T,N}` is a subclass of `AbstractArray{T, N}`. A variable `v` of a type derived from `AbstractVariable` should implement:
41-
42-
* `name(v)`: should be the name of variable within the data set
43-
* `dimnames(v)`: should be a iterable data structure with all dimension names of the variable `v`
44-
* `dataset(v)`: the parent dataset containing `v`
45-
* `Base.size(v)`: the size of the variable
46-
* `Base.getindex(v,indices...)`: get the data of `v` at the provided indices
47-
48-
Optionally a variable can have attributes:
49-
50-
* `attribnames(v)`: should be an iterable with all attribute names
51-
* `attrib(v,name)`: attribute value corresponding to name
52-
53-
For a writable-dataset, one should also implement:
54-
* `defAttrib`: define a attribute
55-
* `Base.setindex(v,data,indices...)`: set the data in `v` at the provided indices
56-
57-
"""
58-
abstract type AbstractVariable{T,N} <: AbstractArray{T, N}
59-
end
60-
61-
62-
const SymbolOrString = Union{Symbol, AbstractString}
6310

11+
include("types.jl")
6412
include("dataset.jl")
6513
include("variable.jl")
6614
include("cfvariable.jl")
6715
include("attribute.jl")
6816
include("dimension.jl")
17+
include("cfconventions.jl")
6918

7019
end # module CommonDataModel
7120

src/attribute.jl

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,51 @@ function show_attrib(io,a)
5454
print(io,"\n")
5555
end
5656
catch err
57+
@debug "error while printing" err
5758
print(io,"Dataset attributes (file closed)")
5859
end
5960
end
61+
62+
63+
"""
64+
Base.keys(a::Attributes)
65+
66+
Return a list of the names of all attributes.
67+
"""
68+
Base.keys(a::Attributes) = attribnames(a.ds)
69+
70+
71+
"""
72+
getindex(a::Attributes,name::SymbolOrString)
73+
74+
Return the value of the attribute called `name` from the
75+
attribute list `a`. Generally the attributes are loaded by
76+
indexing, for example:
77+
78+
```julia
79+
using NCDatasets
80+
ds = NCDataset("file.nc")
81+
title = ds.attrib["title"]
82+
```
83+
"""
84+
Base.getindex(a::Attributes,name) = attrib(a.ds,name)
85+
86+
87+
"""
88+
Base.setindex!(a::Attributes,data,name::SymbolOrString)
89+
90+
Set the attribute called `name` to the value `data` in the
91+
attribute list `a`. `data` can be a vector or a scalar. A scalar
92+
is handeld as a vector with one element in the NetCDF data model.
93+
94+
Generally the attributes are defined by indexing, for example:
95+
96+
```julia
97+
ds = NCDataset("file.nc","c")
98+
ds.attrib["title"] = "my title"
99+
close(ds)
100+
```
101+
"""
102+
Base.setindex!(a::Attributes,data,name) = defAttrib(a.ds,name,data)
103+
104+
Base.show(io::IO,a::Attributes) = show_attrib(io,a)

src/cfconventions.jl

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
2+
CFStdName(n::AbstractString) = CFStdName(Symbol(n))
3+
4+
macro CF_str(n)
5+
CFStdName(n)
6+
end
7+
8+
import Base.string
9+
Base.string(n::CFStdName) = string(n.name)
10+
11+
12+
"""
13+
ncvar = NCDatasets.ancillaryvariables(ncv::NCDatasets.CFVariable,modifier)
14+
15+
Return the first ancillary variables from the NetCDF variable `ncv` with the
16+
standard name modifier `modifier`. It can be used for example to access
17+
related variable like status flags.
18+
"""
19+
function ancillaryvariables(ncv::CFVariable,modifier)
20+
ds = dataset(ncv)
21+
varname = name(ncv)
22+
23+
if !haskey(ncv.attrib,"ancillary_variables")
24+
return nothing
25+
end
26+
27+
ancillary_variables = split(ncv.attrib["ancillary_variables"])
28+
29+
for ancillary_variable in ancillary_variables
30+
ncv_ancillary = ds[ancillary_variable]
31+
if occursin(modifier,ncv_ancillary.attrib["standard_name"])
32+
@debug ancillary_variable
33+
return ncv_ancillary
34+
end
35+
end
36+
37+
# nothing found
38+
return nothing
39+
end
40+
41+
42+
allowmissing(x::AbstractArray{T}) where {T} = convert(AbstractArray{Union{T, Missing}}, x)
43+
44+
"""
45+
data = filter(ncv, indices...; accepted_status_flags = nothing)
46+
47+
Load and filter observations by replacing all variables without an acepted status
48+
flag to `missing`. It is used the attribute `ancillary_variables` to identify
49+
the status flag.
50+
51+
```
52+
# da["data"] is 2D matrix
53+
good_data = NCDatasets.filter(ds["data"],:,:, accepted_status_flags = ["good_data","probably_good_data"])
54+
```
55+
56+
"""
57+
function filter(ncv::AbstractVariable, indices...; accepted_status_flags = nothing)
58+
#function filter_(ncv, indices...)
59+
# accepted_status_flags = ("good_value", "probably_good_value")
60+
data = allowmissing(ncv[indices...])
61+
62+
if (accepted_status_flags != nothing)
63+
ncv_ancillary = ancillaryvariables(ncv,"status_flag");
64+
if ncv_ancillary == nothing
65+
error("no variable with the attribute status_flag as standard_name among $(ancillary_variables) found")
66+
end
67+
68+
flag_values = ncv_ancillary.attrib["flag_values"]
69+
flag_meanings = ncv_ancillary.attrib["flag_meanings"]::String
70+
if typeof(flag_meanings) <: AbstractString
71+
flag_meanings = split(flag_meanings)
72+
end
73+
74+
accepted_status_flag_values = zeros(eltype(flag_values),length(accepted_status_flags))
75+
for i = eachindex(accepted_status_flags,accepted_status_flag_values)
76+
tmp = findfirst(accepted_status_flags[i] .== flag_meanings)
77+
78+
if tmp == nothing
79+
error("cannot recognise flag $(accepted_status_flags[i])")
80+
end
81+
accepted_status_flag_values[i] = flag_values[tmp]
82+
end
83+
#@debug accepted_status_flag_values
84+
85+
dataflag = ncv_ancillary.var[indices...];
86+
for i in eachindex(data)
87+
good = false;
88+
for accepted_status_flag_value in accepted_status_flag_values
89+
good = good || (dataflag[i] .== accepted_status_flag_value)
90+
end
91+
if !good
92+
#@show i,dataflag[i]
93+
data[i] = missing
94+
end
95+
end
96+
end
97+
98+
return data
99+
end
100+
101+
102+
"""
103+
cv = coord(v::Union{CFVariable,Variable},standard_name)
104+
105+
Find the coordinate of the variable `v` by the standard name `standard_name`
106+
or some [standardized heuristics based on units](https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate). If the heuristics fail to detect the coordinate,
107+
consider to modify the netCDF file to add the `standard_name` attribute.
108+
All dimensions of the coordinate must also be dimensions of the variable `v`.
109+
110+
## Example
111+
```julia
112+
using NCDatasets
113+
ds = NCDataset("file.nc")
114+
ncv = ds["SST"]
115+
lon = coord(ncv,"longitude")[:]
116+
lat = coord(ncv,"latitude")[:]
117+
v = ncv[:]
118+
close(ds)
119+
```
120+
"""
121+
function coord(v::AbstractVariable,standard_name)
122+
matches = Dict(
123+
"time" => [r".*since.*"],
124+
# It is great to have choice!
125+
# https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate
126+
"longitude" => [r"degree east",r"degrees east",r"degrees_east",
127+
r"degree_east", r"degree_E", r"degrees_E",
128+
r"degreeE", r"degreesE"],
129+
"latitude" => [r"degree north",r"degrees north",r"degrees_north",
130+
r"degree_north", r"degree_N", r"degrees_N", r"degreeN",
131+
r"degreesN"],
132+
)
133+
134+
ds = dataset(v)
135+
dims = Set(dimnames(v))
136+
137+
# find by standard name
138+
for coord in varbyattrib(ds,standard_name = standard_name)
139+
if Set(dimnames(coord)) dims
140+
return coord
141+
end
142+
end
143+
144+
# find by units
145+
if haskey(matches,standard_name)
146+
# prefer e.g. vectors over scalars
147+
# this is necessary for ROMS model output
148+
coordfound = nothing
149+
coordndims = -1
150+
151+
for (_,coord) in ds
152+
units = get(coord.attrib,"units","")
153+
154+
for re in matches[standard_name]
155+
if match(re,units) != nothing
156+
if Set(dimnames(coord)) dims
157+
if ndims(coord) > coordndims
158+
coordfound = coord
159+
coordndims = ndims(coord)
160+
end
161+
end
162+
end
163+
end
164+
end
165+
166+
return coordfound
167+
end
168+
169+
return nothing
170+
end
171+
172+
173+
174+
175+
"""
176+
b = bounds(ncvar::NCDatasets.CFVariable)
177+
178+
Return the CFVariable corresponding to the `bounds` attribute of the variable `ncvar`.
179+
The time units and calendar from the `ncvar` are used but not the
180+
attributes controling the
181+
packing of data `scale_factor`, `add_offset` and `_FillValue`.
182+
"""
183+
function bounds(ncvar::CFVariable)
184+
ds = dataset(ncvar)
185+
varname = ncvar.attrib["bounds"]
186+
return ds[varname]
187+
end

src/cfvariable.jl

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,7 @@ end
385385
@inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)
386386

387387

388-
function Base.getindex(v::CFVariable,
389-
indexes::Union{Int,Colon,AbstractRange{<:Integer},AbstractVector{<:Integer}}...)
388+
function Base.getindex(v::CFVariable, indexes::Union{Integer,Colon,AbstractRange{<:Integer},AbstractVector{<:Integer}}...)
390389
data = v.var[indexes...]
391390
return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
392391
time_origin(v),time_factor(v),eltype(v))
@@ -459,3 +458,27 @@ function _getattrib(ds,v,parentname,attribname,default)
459458
end
460459
end
461460
end
461+
462+
463+
464+
function _isrelated(v1::AbstractVariable,v2::AbstractVariable)
465+
dimnames(v1) dimnames(v2)
466+
end
467+
468+
function Base.keys(v::AbstractVariable)
469+
ds = dataset(v)
470+
return [varname for (varname,ncvar) in ds if _isrelated(ncvar,v)]
471+
end
472+
473+
474+
function Base.getindex(v::AbstractVariable,name::SymbolOrString)
475+
ds = dataset(v)
476+
ncvar = ds[name]
477+
if _isrelated(ncvar,v)
478+
return ncvar
479+
else
480+
throw(KeyError(name))
481+
end
482+
end
483+
484+
Base.getindex(v::CFVariable,n::CFStdName) = getindex_byname(v,n)

0 commit comments

Comments
 (0)