diff --git a/src/temporal_terms.jl b/src/temporal_terms.jl index 5a29ee24..c50c4748 100644 --- a/src/temporal_terms.jl +++ b/src/temporal_terms.jl @@ -54,7 +54,6 @@ function Base.show(io::IO, ll::LeadLagTerm{<:Any, F}) where F opname = string(nameof(F.instance)) print(io, "$opname($(ll.term), $(ll.nsteps))") end -function StatsBase.coefnames(ll::LeadLagTerm{<:Any, F}) where F - opname = string(nameof(F.instance)) - coefnames(ll.term) .* "_$opname$(ll.nsteps)" -end +StatsBase.coefnames(ll::LeadLagTerm{<:Any, F}) where F = _llcoef(ll, coefnames(ll.term), string(nameof(F.instance))) +_llcoef(ll::LeadLagTerm, t::Symbol, opname) = Symbol(t, "_$opname$(ll.nsteps)") +_llcoef(ll::LeadLagTerm, ts, opname) = [Symbol(t, "_$opname$(ll.nsteps)") for t in ts] diff --git a/src/terms.jl b/src/terms.jl index 2ee5f785..259f7893 100644 --- a/src/terms.jl +++ b/src/terms.jl @@ -543,18 +543,48 @@ vectorize(x) = [x] coefnames(term::AbstractTerm) Return the name(s) of column(s) generated by a term. Return value is either a -`String` or an iterable of `String`s. +`Symbol` or an iterable of `String`s. """ +StatsBase.coefnames(t::Term) = t.sym StatsBase.coefnames(t::FormulaTerm) = (coefnames(t.lhs), coefnames(t.rhs)) -StatsBase.coefnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : [] -StatsBase.coefnames(t::ContinuousTerm) = string(t.sym) -StatsBase.coefnames(t::CategoricalTerm) = - ["$(t.sym): $name" for name in t.contrasts.termnames] -StatsBase.coefnames(t::FunctionTerm) = string(t.exorig) -StatsBase.coefnames(ts::TupleTerm) = reduce(vcat, coefnames.(ts)) +StatsBase.coefnames(::InterceptTerm{H}) where {H} = H ? Symbol(:Intercept) : [] # this seems like the wrong thing to return +StatsBase.coefnames(t::ContinuousTerm) = t.sym +StatsBase.coefnames(t::CategoricalTerm) = [Symbol("$(t.sym): $name") for name in t.contrasts.termnames] +StatsBase.coefnames(t::FunctionTerm) = Symbol(string(t.exorig)) StatsBase.coefnames(t::MatrixTerm) = mapreduce(coefnames, vcat, t.terms) +#function StatsBase.coefnames(t::InteractionTerm) +# Symbol.(kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...)) +#end StatsBase.coefnames(t::InteractionTerm) = - kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...) + Symbol.(kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...)) +StatsBase.coefnames(ts::TupleTerm) = _coefnames(ts.terms) +_coefnames(ts::Tuple) = (coefnames(first(ts)), _coefnames(tail(ts))...) +_coefnames(ts::Tuple{}) = () + +""" + coef(term::AbstractTerm, s::Symbol) +""" +function StatsBase.coef(f::FormulaTerm, s::Symbol) + if coefname(f.lhs) === s + c = f.lhs + else + c = _coef(f.rhs, s) + end + if c isa AbstractTerm + return c + else + error("$c is not a coefficient within $term") + end +end +_coef(t::AbstractTerm, s::Symbol) = coefnames(t) === s ? t : false +function _coef(t::MatrixTerm, s::Symbol) + for t_i in t + coefname(t_i) === s && return t_i + end + return false +end + + ################################################################################ # old Terms features: diff --git a/test/contrasts.jl b/test/contrasts.jl index db744a64..7ae7cb55 100644 --- a/test/contrasts.jl +++ b/test/contrasts.jl @@ -36,7 +36,7 @@ 1 0 0 1 0 0 1 1 0] - @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"] + @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")] mmm = ModelMatrix(mf).m setcontrasts!(mf, x = DummyCoding()) @@ -49,7 +49,7 @@ 1 -1 -1 1 -1 -1 1 1 0] - @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"] + @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")] # change base level of contrast setcontrasts!(mf, x = EffectsCoding(base = :b)) @@ -59,7 +59,7 @@ 1 1 0 1 1 0 1 -1 -1] - @test coefnames(mf) == ["(Intercept)"; "x: a"; "x: c"] + @test coefnames(mf) == [:Intercept; Symbol("x: a"); Symbol("x: c")] # change levels of contrast setcontrasts!(mf, x = EffectsCoding(levels = [:c, :b, :a])) @@ -69,7 +69,7 @@ 1 0 1 1 0 1 1 1 0] - @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: a"] + @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: a")] # change levels and base level of contrast @@ -80,10 +80,10 @@ 1 -1 -1 1 -1 -1 1 0 1] - @test coefnames(mf) == ["(Intercept)"; "x: c"; "x: b"] - + @test coefnames(mf) == [:Intercept; Symbol("x: c"); Symbol("x: b")] + # respect order of levels - + data = DataFrame(x = levels!(categorical(['A', 'B', 'C', 'C', 'D']), ['C', 'B', 'A', 'D'])) f = apply_schema(@formula(x ~ 1), schema(data)) @test modelcols(f.lhs, data) == [0 1 0; 1 0 0; 0 0 0; 0 0 0; 0 0 1] @@ -96,7 +96,7 @@ 1 -1 -1 1 -1 -1 1 1 -1] - @test coefnames(mf) == ["(Intercept)"; "x: b"; "x: c"] + @test coefnames(mf) == [:Intercept; Symbol("x: b"); Symbol("x: c")] # Mismatching types of data and contrasts levels throws an error: @test_throws ArgumentError setcontrasts!(mf, x = EffectsCoding(levels = ["a", "b", "c"])) @@ -111,7 +111,7 @@ 1 -1 1 -1 1 1] - @test coefnames(mf_missing) == ["(Intercept)"; "x: b"] + @test coefnames(mf_missing) == [:Intercept; Symbol("x: b")] # Things that are bad to do: # Applying contrasts that only have a subset of data levels: diff --git a/test/modelmatrix.jl b/test/modelmatrix.jl index 557d4c1e..83a2a6a6 100644 --- a/test/modelmatrix.jl +++ b/test/modelmatrix.jl @@ -21,7 +21,7 @@ x4 = [17.:20;] f = @formula(y ~ 1 + x1 + x2) mf = ModelFrame(f, d) - @test coefnames(mf) == ["(Intercept)","x1","x2"] + @test coefnames(mf) == [:Intercept, :x1, :x2] @test response(mf) == [1:4;] mm = ModelMatrix(mf) smm = ModelMatrix{sparsetype}(mf) @@ -45,7 +45,7 @@ @test mm.m[:,2] == [0, 1., 0, 0] @test mm.m[:,3] == [0, 0, 1., 0] @test mm.m[:,4] == [0, 0, 0, 1.] - @test coefnames(mf)[2:end] == ["x1p: 6", "x1p: 7", "x1p: 8"] + @test coefnames(mf)[2:end] == [Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")] @test mm.m == ModelMatrix{sparsetype}(mf).m #test_group("Creating a model matrix using full formulas: y => x1 + x2, etc") @@ -176,15 +176,15 @@ 1 0 0 1] @test mm.m == ModelMatrix{sparsetype}(mf).m - @test coefnames(mf) == ["x: a", "x: b"] + @test coefnames(mf) == [Symbol("x: a"), Symbol("x: b")] ## promotion blocked when we block default model=StatisticalModel mf = ModelFrame(@formula(n ~ 0 + x), d, model=Nothing, contrasts=cs) mm = ModelMatrix(mf) @test all(mm.m .== ifelse.(d.x .== :a, -1, 1)) - @test coefnames(mf) == ["x: b"] - - + @test coefnames(mf) == [Symbol("x: b")] + + ## No first-order term for interaction mf = ModelFrame(@formula(n ~ 1 + x + x&y), d, contrasts=cs) mm = ModelMatrix(mf) @@ -197,8 +197,8 @@ -1 1 0 1 0 1] @test mm.m == ModelMatrix{sparsetype}(mf).m - @test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"] - + @test coefnames(mf) == [:Intercept, Symbol("x: b"), Symbol("x: a & y: d"), Symbol("x: b & y: d")] + ## When both terms of interaction are non-redundant: mf = ModelFrame(@formula(n ~ 0 + x&y), d, contrasts=cs) mm = ModelMatrix(mf) @@ -211,8 +211,8 @@ 0 0 1 0 0 0 0 1] @test mm.m == ModelMatrix{sparsetype}(mf).m - @test coefnames(mf) == ["x: a & y: c", "x: b & y: c", - "x: a & y: d", "x: b & y: d"] + @test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"), + Symbol("x: a & y: d"), Symbol("x: b & y: d")] # only a three-way interaction: every term is promoted. mf = ModelFrame(@formula(n ~ 0 + x&y&z), d, contrasts=cs) @@ -235,9 +235,9 @@ 0 0 1 0 1 0 0 0 0 1 0 1] @test mm.m == ModelMatrix{sparsetype}(mf).m - @test coefnames(mf) == ["x: a & y: c", "x: b & y: c", - "x: a & y: d", "x: b & y: d", - "x: a & z: f", "x: b & z: f"] + @test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"), + Symbol("x: a & y: d"), Symbol("x: b & y: d"), + Symbol("x: a & z: f"), Symbol("x: b & z: f")] # ...and adding a three-way interaction, only the shared term (x) is promoted. # this is because dropping x gives y&z which isn't present, but dropping y or z @@ -253,10 +253,10 @@ 0 0 1 0 1 0 1 0 0 0 0 1 0 1 0 1] @test mm.m == ModelMatrix{sparsetype}(mf).m - @test coefnames(mf) == ["x: a & y: c", "x: b & y: c", - "x: a & y: d", "x: b & y: d", - "x: a & z: f", "x: b & z: f", - "x: a & y: d & z: f", "x: b & y: d & z: f"] + @test coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"), + Symbol("x: a & y: d"), Symbol("x: b & y: d"), + Symbol("x: a & z: f"), Symbol("x: b & z: f"), + Symbol("x: a & y: d & z: f"), Symbol("x: b & y: d & z: f")] # two two-way interactions, with common lower-order term. the common term x is # promoted in both (along with lower-order term), because in every case, when @@ -272,11 +272,11 @@ 1 0 1 0 1 0 0 1 0 1 0 1] @test mm.m == ModelMatrix{sparsetype}(mf).m - @test coefnames(mf) == ["x: a", "x: b", - "x: a & y: d", "x: b & y: d", - "x: a & z: f", "x: b & z: f"] - - + @test coefnames(mf) == [Symbol("x: a"), Symbol("x: b"), + Symbol("x: a & y: d"), Symbol("x: b & y: d"), + Symbol("x: a & z: f"), Symbol("x: b & z: f")] + + ## FAILS: When both terms are non-redundant and intercept is PRESENT ## (not fully redundant). Ideally, would drop last column. Might make sense ## to warn about this, and suggest recoding x and y into a single variable. @@ -285,8 +285,8 @@ 1 0 1 0 1 0 0 1 1 0 0 0] - @test_broken coefnames(mf) == ["x: a & y: c", "x: b & y: c", - "x: a & y: d", "x: b & y: d"] + @test_broken coefnames(mf) == [Symbol("x: a & y: c"), Symbol("x: b & y: c"), + Symbol("x: a & y: d"), Symbol("x: b & y: d")] ## note that R also does not detect this automatically. it's left to glm et al. ## to detect numerically when the model matrix is rank deficient, which is hard @@ -304,18 +304,18 @@ @testset "arbitrary functions in formulae" begin d = deepcopy(d_orig) mf = ModelFrame(@formula(y ~ log(x1)), d, model=Nothing) - @test coefnames(mf) == ["log(x1)"] + @test coefnames(mf) == [Symbol("log(x1)")] mm = ModelMatrix(mf) @test all(mm.m .== log.(x1)) # | is not special in base formula: d = DataFrame(x = [1,2,3], y = [4,5,6]) mf = ModelFrame(@formula(y ~ 1 + (1 | x)), d) - @test coefnames(mf) == ["(Intercept)", "1 | x"] + @test coefnames(mf) == [:Intercept, Symbol("1 | x")] mf = ModelFrame(@formula(y ~ 0 + (1 | x)), d) @test all(ModelMatrix(mf).m .== float.(1 .| d.x)) - @test coefnames(mf) == ["1 | x"] + @test coefnames(mf) == [Symbol("1 | x")] end diff --git a/test/statsmodel.jl b/test/statsmodel.jl index 95acf642..e7761741 100644 --- a/test/statsmodel.jl +++ b/test/statsmodel.jl @@ -14,8 +14,8 @@ StatsBase.response(mod::DummyMod) = mod.y ## dumb coeftable: just prints the "beta" values StatsBase.coeftable(mod::DummyMod) = CoefTable(reshape(mod.beta, (size(mod.beta,1), 1)), - ["'beta' value"], - ["" for n in 1:size(mod.x,2)], + [Symbol("'beta' value")], + [Symbol("") for n in 1:size(mod.x,2)], 0) # dumb predict: return values predicted by "beta" and dummy confidence bounds function StatsBase.predict(mod::DummyMod; @@ -108,7 +108,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) @test response(m) == Array(d.y) ## coefnames delegated to model frame by default - @test coefnames(m) == coefnames(ModelFrame(f, d)) == ["(Intercept)", "x1", "x2", "x1 & x2"] + @test coefnames(m) == coefnames(ModelFrame(f, d)) == [:Intercept, :x1, :x2, Symbol("x1 & x2")] ## test prediction method ## vanilla @@ -138,7 +138,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) ## test copying of names from Terms to CoefTable ct = coeftable(m) - @test ct.rownms == ["(Intercept)", "x1", "x2", "x1 & x2"] + @test ct.rownms == [:Intercept, :x1, :x2, Symbol("x1 & x2")] ## show with coeftable defined io = IOBuffer() @@ -148,7 +148,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) f2 = @formula(y ~ x1p) m2 = fit(DummyMod, f2, d) - @test coeftable(m2).rownms == ["(Intercept)", "x1p: 6", "x1p: 7", "x1p: 8"] + @test coeftable(m2).rownms == [:Intercept, Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")] ## predict w/ new data missing levels @test predict(m2, d[2:4, :]) == predict(m2)[2:4] @@ -190,7 +190,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) m3 = fit(DummyModNoIntercept, f3, d) ct2 = coeftable(m2) ct3 = coeftable(m3) - @test ct3.rownms == ct2.rownms == ["x1", "x2", "x1 & x2"] + @test ct3.rownms == ct2.rownms == [:x1, :x2, Symbol("x1 & x2")] @test predict(m2, d[2:4, :]) == predict(m2)[2:4] @test predict(m3, d[2:4, :]) == predict(m3)[2:4] @@ -202,7 +202,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg) m3 = fit(DummyModNoIntercept, f3, d) ct2 = coeftable(m2) ct3 = coeftable(m3) - @test ct2.rownms == ct3.rownms == ["x1p: 6", "x1p: 7", "x1p: 8"] + @test ct2.rownms == ct3.rownms == [Symbol("x1p: 6"), Symbol("x1p: 7"), Symbol("x1p: 8")] m4 = fit(DummyModNoIntercept, f3, d, contrasts = Dict(:x1p => EffectsCoding())) @test predict(m2, d[2:4, :]) == predict(m2)[2:4] @test predict(m3, d[2:4, :]) == predict(m3)[2:4] diff --git a/test/temporal_terms.jl b/test/temporal_terms.jl index ef88b771..089a8ffe 100644 --- a/test/temporal_terms.jl +++ b/test/temporal_terms.jl @@ -16,7 +16,7 @@ using DataStructures @test isequal(pred[:, 3], [missing; missing; missing; 1.0:7]) @test isequal(pred[:, 4], fill(missing, 10)) - @test coefnames(f)[2] == ["x_lag0", "x_lag1", "x_lag3", "x_lag11"] + @test coefnames(f)[2] == [:x_lag0, :x_lag1, :x_lag3, :x_lag11] end @testset "1 arg form" begin @@ -26,7 +26,7 @@ using DataStructures resp, pred = modelcols(f, df) @test isequal(pred[:, 1], [missing; 1.0:9]) - @test coefnames(f)[2] == "x_lag1" + @test coefnames(f)[2] == :x_lag1 end @testset "Row Table" begin @@ -53,7 +53,7 @@ using DataStructures resp, pred = modelcols(neg_f, df); @test isequal(pred[:, 1], [3.0:10; missing; missing]) - @test coefnames(neg_f)[2] == "x_lag-2" + @test coefnames(neg_f)[2] == Symbol("x_lag-2") end @testset "Categorical Term use" begin @@ -66,7 +66,7 @@ using DataStructures @test isequal(pred[:, 1], [missing; missing; 0; 1]) @test isequal(pred[:, 2], [missing; missing; 0; 0]) - @test coefnames(f)[2] == ["x: B_lag2", "x: C_lag2"] + @test coefnames(f)[2] == [Symbol("x: B_lag2"), Symbol("x: C_lag2")] end @testset "Diff Demo" begin @@ -107,7 +107,7 @@ using DataStructures @test isequal(pred[:, 3], [4.0:10; missing; missing; missing]) @test isequal(pred[:, 4], fill(missing, 10)) - @test coefnames(f)[2] == ["x_lead0", "x_lead1", "x_lead3", "x_lead11"] + @test coefnames(f)[2] == [:x_lead0, :x_lead1, :x_lead3, :x_lead11] end end end