JuliaStats · Tokazama · Jan 20, 2020 · kleinschmidt · Feb 13, 2020 · kleinschmidt
diff --git a/src/temporal_terms.jl b/src/temporal_terms.jl
@@ -54,7 +54,6 @@ function Base.show(io::IO, ll::LeadLagTerm{<:Any, F}) where F
     opname = string(nameof(F.instance))
     print(io, "$opname($(ll.term), $(ll.nsteps))")
 end
-function StatsBase.coefnames(ll::LeadLagTerm{<:Any, F}) where F
-    opname = string(nameof(F.instance))
-    coefnames(ll.term) .* "_$opname$(ll.nsteps)"
-end
+StatsBase.coefnames(ll::LeadLagTerm{<:Any, F}) where F = _llcoef(ll, coefnames(ll.term), string(nameof(F.instance)))
+_llcoef(ll::LeadLagTerm, t::Symbol, opname) = Symbol(t, "_$opname$(ll.nsteps)")
+_llcoef(ll::LeadLagTerm, ts, opname) = [Symbol(t, "_$opname$(ll.nsteps)") for t in ts]
diff --git a/src/terms.jl b/src/terms.jl
@@ -543,18 +543,48 @@ vectorize(x) = [x]
     coefnames(term::AbstractTerm)
 
 Return the name(s) of column(s) generated by a term.  Return value is either a
-`String` or an iterable of `String`s.
+`Symbol` or an iterable of `String`s.
 """
+StatsBase.coefnames(t::Term) = t.sym
 StatsBase.coefnames(t::FormulaTerm) = (coefnames(t.lhs), coefnames(t.rhs))
-StatsBase.coefnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : []
-StatsBase.coefnames(t::ContinuousTerm) = string(t.sym)
-StatsBase.coefnames(t::CategoricalTerm) = 
-    ["$(t.sym): $name" for name in t.contrasts.termnames]
-StatsBase.coefnames(t::FunctionTerm) = string(t.exorig)
-StatsBase.coefnames(ts::TupleTerm) = reduce(vcat, coefnames.(ts))
+StatsBase.coefnames(::InterceptTerm{H}) where {H} = H ? Symbol(:Intercept) : []  # this seems like the wrong thing to return
+StatsBase.coefnames(t::ContinuousTerm) = t.sym
+StatsBase.coefnames(t::CategoricalTerm) = [Symbol("$(t.sym): $name") for name in t.contrasts.termnames]
+StatsBase.coefnames(t::FunctionTerm) = Symbol(string(t.exorig))
 StatsBase.coefnames(t::MatrixTerm) = mapreduce(coefnames, vcat, t.terms)
+#function StatsBase.coefnames(t::InteractionTerm)
+#    Symbol.(kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...))
+#end
 StatsBase.coefnames(t::InteractionTerm) =
-    kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...)
+    Symbol.(kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...))
+StatsBase.coefnames(ts::TupleTerm) = _coefnames(ts.terms)
+_coefnames(ts::Tuple) = (coefnames(first(ts)), _coefnames(tail(ts))...)
+_coefnames(ts::Tuple{}) = ()
+
+"""
+    coef(term::AbstractTerm, s::Symbol)
+"""
+function StatsBase.coef(f::FormulaTerm, s::Symbol)
+    if coefname(f.lhs) === s
+        c = f.lhs
+    else
+        c = _coef(f.rhs, s)
+    end
+    if c isa AbstractTerm
+        return c
+    else
+        error("$c is not a coefficient within $term")
+    end
+end
+_coef(t::AbstractTerm, s::Symbol) = coefnames(t) === s ? t : false
+function _coef(t::MatrixTerm, s::Symbol)
+    for t_i in t
+        coefname(t_i) === s && return t_i
+    end
+    return false
+end
+
+
 
 ################################################################################
 # old Terms features:

diff --git a/test/contrasts.jl b/test/contrasts.jl
@@ -36,7 +36,7 @@
                                 1  0  0
                                 1  0  0
                                 1  1  0]
-    @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"]
+    @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")]
 
     mmm = ModelMatrix(mf).m
     setcontrasts!(mf, x = DummyCoding())
@@ -49,7 +49,7 @@
                                 1 -1 -1
                                 1 -1 -1
                                 1  1  0]
-    @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"]
+    @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")]
 
     # change base level of contrast
     setcontrasts!(mf, x = EffectsCoding(base = :b))
@@ -59,7 +59,7 @@
                                 1  1  0
                                 1  1  0
                                 1 -1 -1]
-    @test coefnames(mf) == ["(Intercept)"; "x: a"; "x: c"]
+    @test coefnames(mf) == [:Intercept; Symbol("x: a"); Symbol("x: c")]
 
     # change levels of contrast
     setcontrasts!(mf, x = EffectsCoding(levels = [:c, :b, :a]))
@@ -69,7 +69,7 @@
                                 1  0  1
                                 1  0  1
                                 1  1  0]
-    @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: a"]
+    @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: a")]
 
 
     # change levels and base level of contrast
@@ -80,10 +80,10 @@
                                 1 -1 -1
                                 1 -1 -1
                                 1  0  1]
-    @test coefnames(mf) == ["(Intercept)"; "x: c"; "x: b"]
-    
+    @test coefnames(mf) == [:Intercept; Symbol("x: c"); Symbol("x: b")]
+
     # respect order of levels
-    
+
     data = DataFrame(x = levels!(categorical(['A', 'B', 'C', 'C', 'D']), ['C', 'B', 'A', 'D']))
     f = apply_schema(@formula(x ~ 1), schema(data))
     @test modelcols(f.lhs, data) == [0 1 0; 1 0 0; 0 0 0; 0 0 0; 0 0 1]
@@ -96,7 +96,7 @@
                                 1 -1 -1
                                 1 -1 -1
                                 1  1 -1]
-    @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"]
+    @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")]
 
     # Mismatching types of data and contrasts levels throws an error:
     @test_throws ArgumentError setcontrasts!(mf, x = EffectsCoding(levels = ["a", "b", "c"]))
@@ -111,7 +111,7 @@
                                         1 -1
                                         1 -1
                                         1  1]
-    @test coefnames(mf_missing) == ["(Intercept)"; "x: b"]
+    @test coefnames(mf_missing) == [:Intercept; Symbol("x: b")]
 
     # Things that are bad to do:
     # Applying contrasts that only have a subset of data levels:

diff --git a/test/modelmatrix.jl b/test/modelmatrix.jl
@@ -21,7 +21,7 @@
     x4 = [17.:20;]
     f = @formula(y ~ 1 + x1 + x2)
     mf = ModelFrame(f, d)
-    @test coefnames(mf) == ["(Intercept)","x1","x2"]
+    @test coefnames(mf) == [:Intercept, :x1, :x2]
     @test response(mf) == [1:4;]
     mm = ModelMatrix(mf)
     smm = ModelMatrix{sparsetype}(mf)
@@ -45,7 +45,7 @@
     @test mm.m[:,2] == [0, 1., 0, 0]
     @test mm.m[:,3] == [0, 0, 1., 0]
     @test mm.m[:,4] == [0, 0, 0, 1.]
-    @test coefnames(mf)[2:end] == ["x1p: 6", "x1p: 7", "x1p: 8"]
+    @test coefnames(mf)[2:end] == [Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")]
     @test mm.m == ModelMatrix{sparsetype}(mf).m
 
     #test_group("Creating a model matrix using full formulas: y => x1 + x2, etc")
@@ -176,15 +176,15 @@
                        1 0
                        0 1]
         @test mm.m == ModelMatrix{sparsetype}(mf).m
-        @test coefnames(mf) == ["x: a", "x: b"]
+        @test coefnames(mf) == [Symbol("x: a"), Symbol("x: b")]
 
         ## promotion blocked when we block default model=StatisticalModel
         mf = ModelFrame(@formula(n ~ 0 + x), d, model=Nothing, contrasts=cs)
         mm = ModelMatrix(mf)
         @test all(mm.m .== ifelse.(d.x .== :a, -1, 1))
-        @test coefnames(mf) == ["x: b"]
-        
-    
+        @test coefnames(mf) == [Symbol("x: b")]
+
+
         ## No first-order term for interaction
         mf = ModelFrame(@formula(n ~ 1 + x + x&y), d, contrasts=cs)
         mm = ModelMatrix(mf)
@@ -197,8 +197,8 @@
                                  -1  1  0
                                  1  0  1]
         @test mm.m == ModelMatrix{sparsetype}(mf).m
-        @test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"]
-    
+        @test coefnames(mf) == [:Intercept, Symbol("x: b"), Symbol("x: a & y: d"), Symbol("x: b & y: d")]
+
         ## When both terms of interaction are non-redundant:
         mf = ModelFrame(@formula(n ~ 0 + x&y), d, contrasts=cs)
         mm = ModelMatrix(mf)
@@ -211,8 +211,8 @@
                        0 0 1 0
                        0 0 0 1]
         @test mm.m == ModelMatrix{sparsetype}(mf).m
-        @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
-                                "x: a & y: d", "x: b & y: d"]
+        @test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
+                                Symbol("x: a & y: d"), Symbol("x: b & y: d")]
 
         # only a three-way interaction: every term is promoted.
         mf = ModelFrame(@formula(n ~ 0 + x&y&z), d, contrasts=cs)
@@ -235,9 +235,9 @@
                        0 0 1 0  1  0
                        0 0 0 1  0  1]
         @test mm.m == ModelMatrix{sparsetype}(mf).m
-        @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
-                                "x: a & y: d", "x: b & y: d",
-                                "x: a & z: f", "x: b & z: f"]
+        @test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
+                                Symbol("x: a & y: d"), Symbol("x: b & y: d"),
+                                Symbol("x: a & z: f"), Symbol("x: b & z: f")]
 
         # ...and adding a three-way interaction, only the shared term (x) is promoted.
         # this is because dropping x gives y&z which isn't present, but dropping y or z
@@ -253,10 +253,10 @@
                        0 0 1 0  1  0  1  0
                        0 0 0 1  0  1  0  1]
         @test mm.m == ModelMatrix{sparsetype}(mf).m
-        @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
-                                "x: a & y: d", "x: b & y: d",
-                                "x: a & z: f", "x: b & z: f",
-                                "x: a & y: d & z: f", "x: b & y: d & z: f"]
+        @test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
+                                Symbol("x: a & y: d"), Symbol("x: b & y: d"),
+                                Symbol("x: a & z: f"), Symbol("x: b & z: f"),
+                                Symbol("x: a & y: d & z: f"), Symbol("x: b & y: d & z: f")]
 
         # two two-way interactions, with common lower-order term. the common term x is
         # promoted in both (along with lower-order term), because in every case, when
@@ -272,11 +272,11 @@
                        1 0  1  0  1  0
                        0 1  0  1  0  1]
         @test mm.m == ModelMatrix{sparsetype}(mf).m
-        @test coefnames(mf) == ["x: a", "x: b",
-                                "x: a & y: d", "x: b & y: d",
-                                "x: a & z: f", "x: b & z: f"]
-    
-    
+        @test coefnames(mf) == [Symbol("x: a"), Symbol("x: b"),
+                                Symbol("x: a & y: d"), Symbol("x: b & y: d"),
+                                Symbol("x: a & z: f"), Symbol("x: b & z: f")]
+
+
         ## FAILS: When both terms are non-redundant and intercept is PRESENT
         ## (not fully redundant). Ideally, would drop last column. Might make sense
         ## to warn about this, and suggest recoding x and y into a single variable.
@@ -285,8 +285,8 @@
                                            1 0 1 0
                                            1 0 0 1
                                            1 0 0 0]
-        @test_broken coefnames(mf) == ["x: a & y: c", "x: b & y: c",
-                                       "x: a & y: d", "x: b & y: d"]
+        @test_broken coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"),
+                                       Symbol("x: a & y: d"), Symbol("x: b & y: d")]
 
         ## note that R also does not detect this automatically. it's left to glm et al.
         ## to detect numerically when the model matrix is rank deficient, which is hard
@@ -304,18 +304,18 @@
     @testset "arbitrary functions in formulae" begin
         d = deepcopy(d_orig)
         mf = ModelFrame(@formula(y ~ log(x1)), d, model=Nothing)
-        @test coefnames(mf) == ["log(x1)"]
+        @test coefnames(mf) == [Symbol("log(x1)")]
         mm = ModelMatrix(mf)
         @test all(mm.m .== log.(x1))
 
         # | is not special in base formula:
         d = DataFrame(x = [1,2,3], y = [4,5,6])
         mf = ModelFrame(@formula(y ~ 1 + (1 | x)), d)
-        @test coefnames(mf) == ["(Intercept)", "1 | x"]
+        @test coefnames(mf) == [:Intercept, Symbol("1 | x")]
 
         mf = ModelFrame(@formula(y ~ 0 + (1 | x)), d)
         @test all(ModelMatrix(mf).m .== float.(1 .| d.x))
-        @test coefnames(mf) == ["1 | x"]
+        @test coefnames(mf) == [Symbol("1 | x")]
     end
 
 

diff --git a/test/statsmodel.jl b/test/statsmodel.jl
@@ -14,8 +14,8 @@ StatsBase.response(mod::DummyMod) = mod.y
 ## dumb coeftable: just prints the "beta" values
 StatsBase.coeftable(mod::DummyMod) =
     CoefTable(reshape(mod.beta, (size(mod.beta,1), 1)),
-              ["'beta' value"],
-              ["" for n in 1:size(mod.x,2)],
+              [Symbol("'beta' value")],
+              [Symbol("") for n in 1:size(mod.x,2)],
               0)
 # dumb predict: return values predicted by "beta" and dummy confidence bounds
 function StatsBase.predict(mod::DummyMod;
@@ -108,7 +108,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
     @test response(m) == Array(d.y)
 
     ## coefnames delegated to model frame by default
-    @test coefnames(m) == coefnames(ModelFrame(f, d)) == ["(Intercept)", "x1", "x2", "x1 & x2"]
+    @test coefnames(m) == coefnames(ModelFrame(f, d)) == [:Intercept, :x1, :x2, Symbol("x1 & x2")]
 
     ## test prediction method
     ## vanilla
@@ -138,7 +138,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
 
     ## test copying of names from Terms to CoefTable
     ct = coeftable(m)
-    @test ct.rownms == ["(Intercept)", "x1", "x2", "x1 & x2"]
+    @test ct.rownms == [:Intercept, :x1, :x2, Symbol("x1 & x2")]
 
     ## show with coeftable defined
     io = IOBuffer()
@@ -148,7 +148,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
     f2 = @formula(y ~ x1p)
     m2 = fit(DummyMod, f2, d)
 
-    @test coeftable(m2).rownms == ["(Intercept)", "x1p: 6", "x1p: 7", "x1p: 8"]
+    @test coeftable(m2).rownms == [:Intercept, Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")]
 
     ## predict w/ new data missing levels
     @test predict(m2, d[2:4, :]) == predict(m2)[2:4]
@@ -190,7 +190,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
     m3 = fit(DummyModNoIntercept, f3, d)
     ct2 = coeftable(m2)
     ct3 = coeftable(m3)
-    @test ct3.rownms == ct2.rownms == ["x1", "x2", "x1 & x2"]
+    @test ct3.rownms == ct2.rownms == [:x1, :x2, Symbol("x1 & x2")]
     @test predict(m2, d[2:4, :]) == predict(m2)[2:4]
     @test predict(m3, d[2:4, :]) == predict(m3)[2:4]
 
@@ -202,7 +202,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
     m3 = fit(DummyModNoIntercept, f3, d)
     ct2 = coeftable(m2)
     ct3 = coeftable(m3)
-    @test ct2.rownms == ct3.rownms == ["x1p: 6", "x1p: 7", "x1p: 8"]
+    @test ct2.rownms == ct3.rownms == [Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")]
     m4 = fit(DummyModNoIntercept, f3, d, contrasts = Dict(:x1p => EffectsCoding()))
     @test predict(m2, d[2:4, :]) == predict(m2)[2:4]
     @test predict(m3, d[2:4, :]) == predict(m3)[2:4]

diff --git a/test/temporal_terms.jl b/test/temporal_terms.jl
@@ -16,7 +16,7 @@ using DataStructures
             @test isequal(pred[:, 3], [missing; missing; missing; 1.0:7])
             @test isequal(pred[:, 4], fill(missing, 10))
 
-            @test coefnames(f)[2] == ["x_lag0", "x_lag1", "x_lag3", "x_lag11"]
+            @test coefnames(f)[2] == [:x_lag0, :x_lag1, :x_lag3, :x_lag11]
         end
 
         @testset "1 arg form" begin
@@ -26,7 +26,7 @@ using DataStructures
             resp, pred = modelcols(f, df)
 
             @test isequal(pred[:, 1], [missing; 1.0:9])
-            @test coefnames(f)[2] == "x_lag1"
+            @test coefnames(f)[2] == :x_lag1
         end
 
         @testset "Row Table" begin
@@ -53,7 +53,7 @@ using DataStructures
             resp, pred = modelcols(neg_f, df);
 
             @test isequal(pred[:, 1], [3.0:10; missing; missing])
-            @test coefnames(neg_f)[2] == "x_lag-2"
+            @test coefnames(neg_f)[2] == Symbol("x_lag-2")
         end
 
         @testset "Categorical Term use" begin
@@ -66,7 +66,7 @@ using DataStructures
             @test isequal(pred[:, 1], [missing; missing; 0; 1])
             @test isequal(pred[:, 2], [missing; missing; 0; 0])
 
-            @test coefnames(f)[2] == ["x: B_lag2", "x: C_lag2"]
+            @test coefnames(f)[2] == [Symbol("x: B_lag2"), Symbol("x: C_lag2")]
         end
 
         @testset "Diff Demo" begin
@@ -107,7 +107,7 @@ using DataStructures
             @test isequal(pred[:, 3], [4.0:10; missing; missing; missing])
             @test isequal(pred[:, 4], fill(missing, 10))
 
-            @test coefnames(f)[2] == ["x_lead0", "x_lead1", "x_lead3", "x_lead11"]
+            @test coefnames(f)[2] == [:x_lead0, :x_lead1, :x_lead3, :x_lead11]
         end
     end
 end