JuliaGeo
diff --git a/‎Project.toml
Lines changed: 1 addition & 1 deletion b/‎Project.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/CommonDataModel.jl
Lines changed: 3 additions & 54 deletions b/‎src/CommonDataModel.jl
Lines changed: 3 additions & 54 deletions
diff --git a/‎src/attribute.jl
Lines changed: 45 additions & 0 deletions b/‎src/attribute.jl
Lines changed: 45 additions & 0 deletions
diff --git a/‎src/cfconventions.jl
Lines changed: 187 additions & 0 deletions b/‎src/cfconventions.jl
Lines changed: 187 additions & 0 deletions
diff --git a/‎src/cfvariable.jl
Lines changed: 25 additions & 2 deletions b/‎src/cfvariable.jl
Lines changed: 25 additions & 2 deletions
@@ -4,7 +4,7 @@ authors = ["Alexander Barth <[email protected]>"]
 keywords = ["netcdf", "GRIB", "climate and forecast conventions", "oceanography", "meteorology", "climatology", "opendap"]
 license = "MIT"
 desc = "CommonDataModel is a module that defines types common to NetCDF and GRIB data"
-version = "0.2.5"
+version = "0.2.6"
 
 [deps]
 CFTime = "179af706-886a-5703-950a-314cd64e0468"
 
@@ -4,68 +4,17 @@ using CFTime
 using Dates
 using Printf
 using Preferences
-import Base: isopen, show, display, close
+import Base: isopen, show, display, close, filter
 using DataStructures
 
-"""
-
-`AbstractDataset` is a collection of multidimensional variables (for example a
-NetCDF or GRIB file)
-
-A data set `ds` of a type derived from `AbstractDataset` should implemented at minimum:
-
-* `Base.key(ds)`: return a list of variable names as strings
-* `variable(ds,varname::String)`: return an array-like data structure (derived from `AbstractVariable`) of the variables corresponding to `varname`. This array-like data structure should follow the CF semantics.
-* `dimnames(ds)`: should be an iterable with all dimension names in the data set  `ds`
-* `dim(ds,name)`: dimension value corresponding to name
-
-Optionally a data set can have attributes and groups:
-
-* `attribnames(ds)`: should be an iterable with all attribute names
-* `attrib(ds,name)`: attribute value corresponding to name
-* `groupnames(ds)`: should be an iterable with all group names
-* `group(ds,name)`: group corresponding to the name
-
-For a writable-dataset, one should also implement:
-* `defDim`: define a dimension
-* `defAttrib`: define a attribute
-* `defVar`: define a variable
-* `defGroup`: define a group
-"""
-abstract type AbstractDataset
-end
-
-
-"""
-`AbstractVariable{T,N}` is a subclass of `AbstractArray{T, N}`. A variable `v` of a type derived from `AbstractVariable` should implement:
-
-* `name(v)`: should be the name of variable within the data set
-* `dimnames(v)`: should be a iterable data structure with all dimension names of the variable `v`
-* `dataset(v)`: the parent dataset containing `v`
-* `Base.size(v)`: the size of the variable
-* `Base.getindex(v,indices...)`: get the data of `v` at the provided indices
-
-Optionally a variable can have attributes:
-
-* `attribnames(v)`: should be an iterable with all attribute names
-* `attrib(v,name)`: attribute value corresponding to name
-
-For a writable-dataset, one should also implement:
-* `defAttrib`: define a attribute
-* `Base.setindex(v,data,indices...)`: set the data in `v` at the provided indices
-
-"""
-abstract type AbstractVariable{T,N} <: AbstractArray{T, N}
-end
-
-
-const SymbolOrString = Union{Symbol, AbstractString}
 
+include("types.jl")
 include("dataset.jl")
 include("variable.jl")
 include("cfvariable.jl")
 include("attribute.jl")
 include("dimension.jl")
+include("cfconventions.jl")
 
 end # module CommonDataModel
 
 
@@ -54,6 +54,51 @@ function show_attrib(io,a)
             print(io,"\n")
         end
     catch err
+        @debug "error while printing" err
         print(io,"Dataset attributes (file closed)")
     end
 end
+
+
+"""
+    Base.keys(a::Attributes)
+
+Return a list of the names of all attributes.
+"""
+Base.keys(a::Attributes) = attribnames(a.ds)
+
+
+"""
+    getindex(a::Attributes,name::SymbolOrString)
+
+Return the value of the attribute called `name` from the
+attribute list `a`. Generally the attributes are loaded by
+indexing, for example:
+
+```julia
+using NCDatasets
+ds = NCDataset("file.nc")
+title = ds.attrib["title"]
+```
+"""
+Base.getindex(a::Attributes,name) = attrib(a.ds,name)
+
+
+"""
+    Base.setindex!(a::Attributes,data,name::SymbolOrString)
+
+Set the attribute called `name` to the value `data` in the
+attribute list `a`. `data` can be a vector or a scalar. A scalar
+is handeld as a vector with one element in the NetCDF data model.
+
+Generally the attributes are defined by indexing, for example:
+
+```julia
+ds = NCDataset("file.nc","c")
+ds.attrib["title"] = "my title"
+close(ds)
+```
+"""
+Base.setindex!(a::Attributes,data,name) = defAttrib(a.ds,name,data)
+
+Base.show(io::IO,a::Attributes) = show_attrib(io,a)
@@ -0,0 +1,187 @@
+
+CFStdName(n::AbstractString) = CFStdName(Symbol(n))
+
+macro CF_str(n)
+    CFStdName(n)
+end
+
+import Base.string
+Base.string(n::CFStdName) = string(n.name)
+
+
+"""
+    ncvar = NCDatasets.ancillaryvariables(ncv::NCDatasets.CFVariable,modifier)
+
+Return the first ancillary variables from the NetCDF variable `ncv` with the
+standard name modifier `modifier`. It can be used for example to access
+related variable like status flags.
+"""
+function ancillaryvariables(ncv::CFVariable,modifier)
+    ds = dataset(ncv)
+    varname = name(ncv)
+
+    if !haskey(ncv.attrib,"ancillary_variables")
+        return nothing
+    end
+
+    ancillary_variables = split(ncv.attrib["ancillary_variables"])
+
+    for ancillary_variable in ancillary_variables
+        ncv_ancillary = ds[ancillary_variable]
+        if occursin(modifier,ncv_ancillary.attrib["standard_name"])
+            @debug ancillary_variable
+            return ncv_ancillary
+        end
+    end
+
+    # nothing found
+    return nothing
+end
+
+
+allowmissing(x::AbstractArray{T}) where {T} = convert(AbstractArray{Union{T, Missing}}, x)
+
+"""
+    data = filter(ncv, indices...; accepted_status_flags = nothing)
+
+Load and filter observations by replacing all variables without an acepted status
+flag to `missing`. It is used the attribute `ancillary_variables` to identify
+the status flag.
+
+```
+# da["data"] is 2D matrix
+good_data = NCDatasets.filter(ds["data"],:,:, accepted_status_flags = ["good_data","probably_good_data"])
+```
+
+"""
+function filter(ncv::AbstractVariable, indices...; accepted_status_flags = nothing)
+#function filter_(ncv, indices...)
+#    accepted_status_flags = ("good_value", "probably_good_value")
+    data = allowmissing(ncv[indices...])
+
+    if (accepted_status_flags != nothing)
+        ncv_ancillary = ancillaryvariables(ncv,"status_flag");
+        if ncv_ancillary == nothing
+            error("no variable with the attribute status_flag as standard_name among $(ancillary_variables) found")
+        end
+
+        flag_values = ncv_ancillary.attrib["flag_values"]
+        flag_meanings = ncv_ancillary.attrib["flag_meanings"]::String
+        if typeof(flag_meanings) <: AbstractString
+            flag_meanings = split(flag_meanings)
+        end
+
+        accepted_status_flag_values = zeros(eltype(flag_values),length(accepted_status_flags))
+        for i = eachindex(accepted_status_flags,accepted_status_flag_values)
+            tmp = findfirst(accepted_status_flags[i] .== flag_meanings)
+
+            if tmp == nothing
+                error("cannot recognise flag $(accepted_status_flags[i])")
+            end
+            accepted_status_flag_values[i] = flag_values[tmp]
+        end
+        #@debug accepted_status_flag_values
+
+        dataflag = ncv_ancillary.var[indices...];
+        for i in eachindex(data)
+            good = false;
+            for accepted_status_flag_value in accepted_status_flag_values
+                good = good || (dataflag[i] .== accepted_status_flag_value)
+            end
+            if !good
+                #@show i,dataflag[i]
+                data[i] = missing
+            end
+        end
+    end
+
+    return data
+end
+
+
+"""
+    cv = coord(v::Union{CFVariable,Variable},standard_name)
+
+Find the coordinate of the variable `v` by the standard name `standard_name`
+or some [standardized heuristics based on units](https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate). If the heuristics fail to detect the coordinate,
+consider to modify the netCDF file to add the `standard_name` attribute.
+All dimensions of the coordinate must also be dimensions of the variable `v`.
+
+## Example
+```julia
+using NCDatasets
+ds = NCDataset("file.nc")
+ncv = ds["SST"]
+lon = coord(ncv,"longitude")[:]
+lat = coord(ncv,"latitude")[:]
+v = ncv[:]
+close(ds)
+```
+"""
+function coord(v::AbstractVariable,standard_name)
+    matches = Dict(
+        "time" => [r".*since.*"],
+        # It is great to have choice!
+        # https://web.archive.org/web/20190918144052/http://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate
+        "longitude" => [r"degree east",r"degrees east",r"degrees_east",
+                        r"degree_east", r"degree_E", r"degrees_E",
+                        r"degreeE", r"degreesE"],
+        "latitude" => [r"degree north",r"degrees north",r"degrees_north",
+                       r"degree_north", r"degree_N", r"degrees_N", r"degreeN",
+                       r"degreesN"],
+    )
+
+    ds = dataset(v)
+    dims = Set(dimnames(v))
+
+    # find by standard name
+    for coord in varbyattrib(ds,standard_name = standard_name)
+        if Set(dimnames(coord)) ⊆ dims
+            return coord
+        end
+    end
+
+    # find by units
+    if haskey(matches,standard_name)
+        # prefer e.g. vectors over scalars
+        # this is necessary for ROMS model output
+        coordfound = nothing
+        coordndims = -1
+
+        for (_,coord) in ds
+            units = get(coord.attrib,"units","")
+
+            for re in matches[standard_name]
+                if match(re,units) != nothing
+                    if Set(dimnames(coord)) ⊆ dims
+                        if ndims(coord) > coordndims
+                            coordfound = coord
+                            coordndims = ndims(coord)
+                        end
+                    end
+                end
+            end
+        end
+
+        return coordfound
+    end
+
+    return nothing
+end
+
+
+
+
+"""
+    b = bounds(ncvar::NCDatasets.CFVariable)
+
+Return the CFVariable corresponding to the `bounds` attribute of the variable `ncvar`.
+The time units and calendar from the `ncvar` are used but not the
+attributes controling the
+packing of data `scale_factor`, `add_offset` and `_FillValue`.
+"""
+function bounds(ncvar::CFVariable)
+    ds = dataset(ncvar)
+    varname = ncvar.attrib["bounds"]
+    return ds[varname]
+end
@@ -385,8 +385,7 @@ end
 @inline CFinvtransformdata(data::Char,fv,scale_factor,add_offset,time_origin,time_factor,DT) = CFtransform_replace_missing(data,fv)
 
 
-function Base.getindex(v::CFVariable,
-                       indexes::Union{Int,Colon,AbstractRange{<:Integer},AbstractVector{<:Integer}}...)
+function Base.getindex(v::CFVariable, indexes::Union{Integer,Colon,AbstractRange{<:Integer},AbstractVector{<:Integer}}...)
     data = v.var[indexes...]
     return CFtransformdata(data,fill_and_missing_values(v),scale_factor(v),add_offset(v),
                            time_origin(v),time_factor(v),eltype(v))
@@ -459,3 +458,27 @@ function _getattrib(ds,v,parentname,attribname,default)
         end
     end
 end
+
+
+
+function _isrelated(v1::AbstractVariable,v2::AbstractVariable)
+    dimnames(v1) ⊆ dimnames(v2)
+end
+
+function Base.keys(v::AbstractVariable)
+    ds = dataset(v)
+    return [varname for (varname,ncvar) in ds if _isrelated(ncvar,v)]
+end
+
+
+function Base.getindex(v::AbstractVariable,name::SymbolOrString)
+    ds = dataset(v)
+    ncvar = ds[name]
+    if _isrelated(ncvar,v)
+        return ncvar
+    else
+        throw(KeyError(name))
+    end
+end
+
+Base.getindex(v::CFVariable,n::CFStdName) = getindex_byname(v,n)