Skip to content

coefnames returns symbols #169

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/temporal_terms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ function Base.show(io::IO, ll::LeadLagTerm{<:Any, F}) where F
opname = string(nameof(F.instance))
print(io, "$opname($(ll.term), $(ll.nsteps))")
end
function StatsBase.coefnames(ll::LeadLagTerm{<:Any, F}) where F
opname = string(nameof(F.instance))
coefnames(ll.term) .* "_$opname$(ll.nsteps)"
end
StatsBase.coefnames(ll::LeadLagTerm{<:Any, F}) where F = _llcoef(ll, coefnames(ll.term), string(nameof(F.instance)))
_llcoef(ll::LeadLagTerm, t::Symbol, opname) = Symbol(t, "_$opname$(ll.nsteps)")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why can't you use the original broadcast based solution?

_llcoef(ll::LeadLagTerm, ts, opname) = [Symbol(t, "_$opname$(ll.nsteps)") for t in ts]
46 changes: 38 additions & 8 deletions src/terms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -543,18 +543,48 @@ vectorize(x) = [x]
coefnames(term::AbstractTerm)

Return the name(s) of column(s) generated by a term. Return value is either a
`String` or an iterable of `String`s.
`Symbol` or an iterable of `String`s.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missed a String

"""
StatsBase.coefnames(t::Term) = t.sym
StatsBase.coefnames(t::FormulaTerm) = (coefnames(t.lhs), coefnames(t.rhs))
StatsBase.coefnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : []
StatsBase.coefnames(t::ContinuousTerm) = string(t.sym)
StatsBase.coefnames(t::CategoricalTerm) =
["$(t.sym): $name" for name in t.contrasts.termnames]
StatsBase.coefnames(t::FunctionTerm) = string(t.exorig)
StatsBase.coefnames(ts::TupleTerm) = reduce(vcat, coefnames.(ts))
StatsBase.coefnames(::InterceptTerm{H}) where {H} = H ? Symbol(:Intercept) : [] # this seems like the wrong thing to return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we keep the parens? Then you'd have Symbol("(Intercept)").

StatsBase.coefnames(t::ContinuousTerm) = t.sym
StatsBase.coefnames(t::CategoricalTerm) = [Symbol("$(t.sym): $name") for name in t.contrasts.termnames]
StatsBase.coefnames(t::FunctionTerm) = Symbol(string(t.exorig))
StatsBase.coefnames(t::MatrixTerm) = mapreduce(coefnames, vcat, t.terms)
#function StatsBase.coefnames(t::InteractionTerm)
# Symbol.(kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...))
#end
StatsBase.coefnames(t::InteractionTerm) =
kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...)
Symbol.(kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...))
StatsBase.coefnames(ts::TupleTerm) = _coefnames(ts.terms)
_coefnames(ts::Tuple) = (coefnames(first(ts)), _coefnames(tail(ts))...)
_coefnames(ts::Tuple{}) = ()

"""
coef(term::AbstractTerm, s::Symbol)
"""
function StatsBase.coef(f::FormulaTerm, s::Symbol)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see what the point of this is, or rather why it's called coef (which means something different in statsbase). can you explain what your goal is here? in any case, this needs tests if it's going to be included, and it seems like it would be better in a separate PR.

also, it doesn't seem like this works for categorical terms or for interactions, even if you specify the full coefficient name, since coefnames will be a vector of symbols.

and the RHS can have other things than just a MatrixTerm (e.g., MixedModels has a tuple of fixed and random effect matrices which are of different types)

if coefname(f.lhs) === s
c = f.lhs
else
c = _coef(f.rhs, s)
end
if c isa AbstractTerm
return c
else
error("$c is not a coefficient within $term")
end
end
_coef(t::AbstractTerm, s::Symbol) = coefnames(t) === s ? t : false
function _coef(t::MatrixTerm, s::Symbol)
for t_i in t
coefname(t_i) === s && return t_i
end
return false
end



################################################################################
# old Terms features:
Expand Down
18 changes: 9 additions & 9 deletions test/contrasts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
1 0 0
1 0 0
1 1 0]
@test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"]
@test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")]

mmm = ModelMatrix(mf).m
setcontrasts!(mf, x = DummyCoding())
Expand All @@ -49,7 +49,7 @@
1 -1 -1
1 -1 -1
1 1 0]
@test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"]
@test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")]

# change base level of contrast
setcontrasts!(mf, x = EffectsCoding(base = :b))
Expand All @@ -59,7 +59,7 @@
1 1 0
1 1 0
1 -1 -1]
@test coefnames(mf) == ["(Intercept)"; "x: a"; "x: c"]
@test coefnames(mf) == [:Intercept; Symbol("x: a"); Symbol("x: c")]

# change levels of contrast
setcontrasts!(mf, x = EffectsCoding(levels = [:c, :b, :a]))
Expand All @@ -69,7 +69,7 @@
1 0 1
1 0 1
1 1 0]
@test coefnames(mf) == ["(Intercept)"; "x: b"; "x: a"]
@test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: a")]


# change levels and base level of contrast
Expand All @@ -80,10 +80,10 @@
1 -1 -1
1 -1 -1
1 0 1]
@test coefnames(mf) == ["(Intercept)"; "x: c"; "x: b"]
@test coefnames(mf) == [:Intercept; Symbol("x: c"); Symbol("x: b")]

# respect order of levels

data = DataFrame(x = levels!(categorical(['A', 'B', 'C', 'C', 'D']), ['C', 'B', 'A', 'D']))
f = apply_schema(@formula(x ~ 1), schema(data))
@test modelcols(f.lhs, data) == [0 1 0; 1 0 0; 0 0 0; 0 0 0; 0 0 1]
Expand All @@ -96,7 +96,7 @@
1 -1 -1
1 -1 -1
1 1 -1]
@test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"]
@test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")]

# Mismatching types of data and contrasts levels throws an error:
@test_throws ArgumentError setcontrasts!(mf, x = EffectsCoding(levels = ["a", "b", "c"]))
Expand All @@ -111,7 +111,7 @@
1 -1
1 -1
1 1]
@test coefnames(mf_missing) == ["(Intercept)"; "x: b"]
@test coefnames(mf_missing) == [:Intercept; Symbol("x: b")]

# Things that are bad to do:
# Applying contrasts that only have a subset of data levels:
Expand Down
54 changes: 27 additions & 27 deletions test/modelmatrix.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
x4 = [17.:20;]
f = @formula(y ~ 1 + x1 + x2)
mf = ModelFrame(f, d)
@test coefnames(mf) == ["(Intercept)","x1","x2"]
@test coefnames(mf) == [:Intercept, :x1, :x2]
@test response(mf) == [1:4;]
mm = ModelMatrix(mf)
smm = ModelMatrix{sparsetype}(mf)
Expand All @@ -45,7 +45,7 @@
@test mm.m[:,2] == [0, 1., 0, 0]
@test mm.m[:,3] == [0, 0, 1., 0]
@test mm.m[:,4] == [0, 0, 0, 1.]
@test coefnames(mf)[2:end] == ["x1p: 6", "x1p: 7", "x1p: 8"]
@test coefnames(mf)[2:end] == [Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")]
@test mm.m == ModelMatrix{sparsetype}(mf).m

#test_group("Creating a model matrix using full formulas: y => x1 + x2, etc")
Expand Down Expand Up @@ -176,15 +176,15 @@
1 0
0 1]
@test mm.m == ModelMatrix{sparsetype}(mf).m
@test coefnames(mf) == ["x: a", "x: b"]
@test coefnames(mf) == [Symbol("x: a"), Symbol("x: b")]

## promotion blocked when we block default model=StatisticalModel
mf = ModelFrame(@formula(n ~ 0 + x), d, model=Nothing, contrasts=cs)
mm = ModelMatrix(mf)
@test all(mm.m .== ifelse.(d.x .== :a, -1, 1))
@test coefnames(mf) == ["x: b"]
@test coefnames(mf) == [Symbol("x: b")]


## No first-order term for interaction
mf = ModelFrame(@formula(n ~ 1 + x + x&y), d, contrasts=cs)
mm = ModelMatrix(mf)
Expand All @@ -197,8 +197,8 @@
-1 1 0
1 0 1]
@test mm.m == ModelMatrix{sparsetype}(mf).m
@test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"]
@test coefnames(mf) == [:Intercept, Symbol("x: b"), Symbol("x: a & y: d"), Symbol("x: b & y: d")]

## When both terms of interaction are non-redundant:
mf = ModelFrame(@formula(n ~ 0 + x&y), d, contrasts=cs)
mm = ModelMatrix(mf)
Expand All @@ -211,8 +211,8 @@
0 0 1 0
0 0 0 1]
@test mm.m == ModelMatrix{sparsetype}(mf).m
@test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
"x: a & y: d", "x: b & y: d"]
@test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
Symbol("x: a & y: d"), Symbol("x: b & y: d")]

# only a three-way interaction: every term is promoted.
mf = ModelFrame(@formula(n ~ 0 + x&y&z), d, contrasts=cs)
Expand All @@ -235,9 +235,9 @@
0 0 1 0 1 0
0 0 0 1 0 1]
@test mm.m == ModelMatrix{sparsetype}(mf).m
@test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
"x: a & y: d", "x: b & y: d",
"x: a & z: f", "x: b & z: f"]
@test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
Symbol("x: a & y: d"), Symbol("x: b & y: d"),
Symbol("x: a & z: f"), Symbol("x: b & z: f")]

# ...and adding a three-way interaction, only the shared term (x) is promoted.
# this is because dropping x gives y&z which isn't present, but dropping y or z
Expand All @@ -253,10 +253,10 @@
0 0 1 0 1 0 1 0
0 0 0 1 0 1 0 1]
@test mm.m == ModelMatrix{sparsetype}(mf).m
@test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
"x: a & y: d", "x: b & y: d",
"x: a & z: f", "x: b & z: f",
"x: a & y: d & z: f", "x: b & y: d & z: f"]
@test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
Symbol("x: a & y: d"), Symbol("x: b & y: d"),
Symbol("x: a & z: f"), Symbol("x: b & z: f"),
Symbol("x: a & y: d & z: f"), Symbol("x: b & y: d & z: f")]

# two two-way interactions, with common lower-order term. the common term x is
# promoted in both (along with lower-order term), because in every case, when
Expand All @@ -272,11 +272,11 @@
1 0 1 0 1 0
0 1 0 1 0 1]
@test mm.m == ModelMatrix{sparsetype}(mf).m
@test coefnames(mf) == ["x: a", "x: b",
"x: a & y: d", "x: b & y: d",
"x: a & z: f", "x: b & z: f"]
@test coefnames(mf) == [Symbol("x: a"), Symbol("x: b"),
Symbol("x: a & y: d"), Symbol("x: b & y: d"),
Symbol("x: a & z: f"), Symbol("x: b & z: f")]


## FAILS: When both terms are non-redundant and intercept is PRESENT
## (not fully redundant). Ideally, would drop last column. Might make sense
## to warn about this, and suggest recoding x and y into a single variable.
Expand All @@ -285,8 +285,8 @@
1 0 1 0
1 0 0 1
1 0 0 0]
@test_broken coefnames(mf) == ["x: a & y: c", "x: b & y: c",
"x: a & y: d", "x: b & y: d"]
@test_broken coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
Symbol("x: a & y: d"), Symbol("x: b & y: d")]

## note that R also does not detect this automatically. it's left to glm et al.
## to detect numerically when the model matrix is rank deficient, which is hard
Expand All @@ -304,18 +304,18 @@
@testset "arbitrary functions in formulae" begin
d = deepcopy(d_orig)
mf = ModelFrame(@formula(y ~ log(x1)), d, model=Nothing)
@test coefnames(mf) == ["log(x1)"]
@test coefnames(mf) == [Symbol("log(x1)")]
mm = ModelMatrix(mf)
@test all(mm.m .== log.(x1))

# | is not special in base formula:
d = DataFrame(x = [1,2,3], y = [4,5,6])
mf = ModelFrame(@formula(y ~ 1 + (1 | x)), d)
@test coefnames(mf) == ["(Intercept)", "1 | x"]
@test coefnames(mf) == [:Intercept, Symbol("1 | x")]

mf = ModelFrame(@formula(y ~ 0 + (1 | x)), d)
@test all(ModelMatrix(mf).m .== float.(1 .| d.x))
@test coefnames(mf) == ["1 | x"]
@test coefnames(mf) == [Symbol("1 | x")]
end


Expand Down
14 changes: 7 additions & 7 deletions test/statsmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ StatsBase.response(mod::DummyMod) = mod.y
## dumb coeftable: just prints the "beta" values
StatsBase.coeftable(mod::DummyMod) =
CoefTable(reshape(mod.beta, (size(mod.beta,1), 1)),
["'beta' value"],
["" for n in 1:size(mod.x,2)],
[Symbol("'beta' value")],
[Symbol("") for n in 1:size(mod.x,2)],
0)
# dumb predict: return values predicted by "beta" and dummy confidence bounds
function StatsBase.predict(mod::DummyMod;
Expand Down Expand Up @@ -108,7 +108,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
@test response(m) == Array(d.y)

## coefnames delegated to model frame by default
@test coefnames(m) == coefnames(ModelFrame(f, d)) == ["(Intercept)", "x1", "x2", "x1 & x2"]
@test coefnames(m) == coefnames(ModelFrame(f, d)) == [:Intercept, :x1, :x2, Symbol("x1 & x2")]

## test prediction method
## vanilla
Expand Down Expand Up @@ -138,7 +138,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)

## test copying of names from Terms to CoefTable
ct = coeftable(m)
@test ct.rownms == ["(Intercept)", "x1", "x2", "x1 & x2"]
@test ct.rownms == [:Intercept, :x1, :x2, Symbol("x1 & x2")]

## show with coeftable defined
io = IOBuffer()
Expand All @@ -148,7 +148,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
f2 = @formula(y ~ x1p)
m2 = fit(DummyMod, f2, d)

@test coeftable(m2).rownms == ["(Intercept)", "x1p: 6", "x1p: 7", "x1p: 8"]
@test coeftable(m2).rownms == [:Intercept, Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")]

## predict w/ new data missing levels
@test predict(m2, d[2:4, :]) == predict(m2)[2:4]
Expand Down Expand Up @@ -190,7 +190,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
m3 = fit(DummyModNoIntercept, f3, d)
ct2 = coeftable(m2)
ct3 = coeftable(m3)
@test ct3.rownms == ct2.rownms == ["x1", "x2", "x1 & x2"]
@test ct3.rownms == ct2.rownms == [:x1, :x2, Symbol("x1 & x2")]
@test predict(m2, d[2:4, :]) == predict(m2)[2:4]
@test predict(m3, d[2:4, :]) == predict(m3)[2:4]

Expand All @@ -202,7 +202,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
m3 = fit(DummyModNoIntercept, f3, d)
ct2 = coeftable(m2)
ct3 = coeftable(m3)
@test ct2.rownms == ct3.rownms == ["x1p: 6", "x1p: 7", "x1p: 8"]
@test ct2.rownms == ct3.rownms == [Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")]
m4 = fit(DummyModNoIntercept, f3, d, contrasts = Dict(:x1p => EffectsCoding()))
@test predict(m2, d[2:4, :]) == predict(m2)[2:4]
@test predict(m3, d[2:4, :]) == predict(m3)[2:4]
Expand Down
10 changes: 5 additions & 5 deletions test/temporal_terms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ using DataStructures
@test isequal(pred[:, 3], [missing; missing; missing; 1.0:7])
@test isequal(pred[:, 4], fill(missing, 10))

@test coefnames(f)[2] == ["x_lag0", "x_lag1", "x_lag3", "x_lag11"]
@test coefnames(f)[2] == [:x_lag0, :x_lag1, :x_lag3, :x_lag11]
end

@testset "1 arg form" begin
Expand All @@ -26,7 +26,7 @@ using DataStructures
resp, pred = modelcols(f, df)

@test isequal(pred[:, 1], [missing; 1.0:9])
@test coefnames(f)[2] == "x_lag1"
@test coefnames(f)[2] == :x_lag1
end

@testset "Row Table" begin
Expand All @@ -53,7 +53,7 @@ using DataStructures
resp, pred = modelcols(neg_f, df);

@test isequal(pred[:, 1], [3.0:10; missing; missing])
@test coefnames(neg_f)[2] == "x_lag-2"
@test coefnames(neg_f)[2] == Symbol("x_lag-2")
end

@testset "Categorical Term use" begin
Expand All @@ -66,7 +66,7 @@ using DataStructures
@test isequal(pred[:, 1], [missing; missing; 0; 1])
@test isequal(pred[:, 2], [missing; missing; 0; 0])

@test coefnames(f)[2] == ["x: B_lag2", "x: C_lag2"]
@test coefnames(f)[2] == [Symbol("x: B_lag2"), Symbol("x: C_lag2")]
end

@testset "Diff Demo" begin
Expand Down Expand Up @@ -107,7 +107,7 @@ using DataStructures
@test isequal(pred[:, 3], [4.0:10; missing; missing; missing])
@test isequal(pred[:, 4], fill(missing, 10))

@test coefnames(f)[2] == ["x_lead0", "x_lead1", "x_lead3", "x_lead11"]
@test coefnames(f)[2] == [:x_lead0, :x_lead1, :x_lead3, :x_lead11]
end
end
end