diff --git a/Project.toml b/Project.toml index 830b0c2..f85f088 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "FixedEffectModels" uuid = "9d5cd8c9-2029-5cab-9928-427838db53e3" -version = "1.7.0" +version = "1.7.1" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" diff --git a/src/FixedEffectModel.jl b/src/FixedEffectModel.jl index 7cfcd91..ec60b31 100644 --- a/src/FixedEffectModel.jl +++ b/src/FixedEffectModel.jl @@ -25,8 +25,7 @@ struct FixedEffectModel <: RegressionModel nobs::Int64 # Number of observations dof::Int64 # Number parameters estimated - has_intercept - dof_residual::Int64 # nobs - degrees of freedoms - dof_tstat::Int64 # dof used for t-test and F-stat + dof_residual::Int64 # dof used for t-test and F-stat. nobs - degrees of freedoms with simple std rss::Float64 # Sum of squared residuals tss::Float64 # Total sum of squares @@ -58,7 +57,6 @@ StatsAPI.vcov(m::FixedEffectModel) = m.vcov StatsAPI.nobs(m::FixedEffectModel) = m.nobs StatsAPI.dof(m::FixedEffectModel) = m.dof StatsAPI.dof_residual(m::FixedEffectModel) = m.dof_residual -Vcov.dof_tstat(m::FixedEffectModel) = m.dof_tstat StatsAPI.r2(m::FixedEffectModel) = m.r2 StatsAPI.adjr2(m::FixedEffectModel) = m.adjr2 StatsAPI.islinear(m::FixedEffectModel) = true @@ -68,7 +66,7 @@ StatsAPI.mss(m::FixedEffectModel) = deviance(m) - rss(m) function StatsAPI.confint(m::FixedEffectModel; level::Real = 0.95) - scale = tdistinvcdf(Vcov.dof_tstat(m), 1 - (1 - level) / 2) + scale = tdistinvcdf(Vcov.dof_residual(m), 1 - (1 - level) / 2) se = stderror(m) hcat(m.coef - scale * se, m.coef + scale * se) end @@ -78,7 +76,7 @@ function StatsAPI.predict(m::FixedEffectModel, t) # Require DataFrame input as we are using leftjoin and select from DataFrames here # Make sure fes are saved if has_fe(m) - !isempty(m.fe) || throw("No estimates for fixed effects found. Fixed effects need to be estimated using the option save = :fe or :all for prediction to work.") + throw("To predict in a fixed effect regression, run `reg` with the option save = true, and then access predicted values using `fe().") end ct = StatsModels.columntable(t) cols, nonmissings = StatsModels.missing_omit(ct, MatrixTerm(m.formula_schema.rhs)) @@ -87,20 +85,20 @@ function StatsAPI.predict(m::FixedEffectModel, t) out[nonmissings] = Xnew * m.coef # Join FE estimates onto data and sum row-wise - if has_fe(m) - df = DataFrame(t; copycols = false) - fes = leftjoin(select(df, m.fekeys), unique(m.fe); on = m.fekeys, makeunique = true, matchmissing = :equal) - fes = combine(fes, AsTable(Not(m.fekeys)) => sum) - out[nonmissings] .+= fes[nonmissings, 1] - end + # This code does not work propertly with missing or with interacted fixed effect, so deleted + #if has_fe(m) + # df = DataFrame(t; copycols = false) + # fes = leftjoin(select(df, m.fekeys), unique(m.fe); on = m.fekeys, makeunique = true, #matchmissing = :equal) + # fes = combine(fes, AsTable(Not(m.fekeys)) => sum) + # out[nonmissings] .+= fes[nonmissings, 1] + #end return out end function StatsAPI.residuals(m::FixedEffectModel, t) if has_fe(m) - m.residuals !== nothing || throw("To access residuals in a fixed effect regression, run `reg` with the option save = :residuals, and then access residuals with `residuals()`") - residuals(m) + throw("To access residuals in a fixed effect regression, run `reg` with the option save = :residuals, and then access residuals with `residuals()`") else ct = StatsModels.columntable(t) cols, nonmissings = StatsModels.missing_omit(ct, MatrixTerm(m.formula_schema.rhs)) @@ -118,7 +116,10 @@ end function StatsAPI.residuals(m::FixedEffectModel) - has_fe(m) || throw("To access residuals, use residuals(x, t) where t is a Table") + if m.residuals === nothing + has_fe(m) && throw("To access residuals in a fixed effect regression, run `reg` with the option save = :residuals, and then access residuals with `residuals()`") + !has_fe(m) && throw("To access residuals, use residuals(x, t) where t is a Table") + end m.residuals end @@ -157,7 +158,7 @@ function StatsAPI.coeftable(m::FixedEffectModel; level = 0.95) end tt = cc ./ se CoefTable( - hcat(cc, se, tt, fdistccdf.(Ref(1), Ref(Vcov.dof_tstat(m)), abs2.(tt)), conf_int[:, 1:2]), + hcat(cc, se, tt, fdistccdf.(Ref(1), Ref(Vcov.dof_residual(m)), abs2.(tt)), conf_int[:, 1:2]), ["Estimate","Std.Error","t value", "Pr(>|t|)", "Lower 95%", "Upper 95%" ], ["$(coefnms[i])" for i = 1:length(cc)], 4) end @@ -227,7 +228,7 @@ function Base.show(io::IO, m::FixedEffectModel) coefnms = coefnms[newindex] end tt = cc ./ se - mat = hcat(cc, se, tt, fdistccdf.(Ref(1), Ref(Vcov.dof_tstat(m)), abs2.(tt)), conf_int[:, 1:2]) + mat = hcat(cc, se, tt, fdistccdf.(Ref(1), Ref(Vcov.dof_residual(m)), abs2.(tt)), conf_int[:, 1:2]) nr, nc = size(mat) colnms = ["Estimate","Std.Error","t value", "Pr(>|t|)", "Lower 95%", "Upper 95%"] rownms = ["$(coefnms[i])" for i = 1:length(cc)] diff --git a/src/fit.jl b/src/fit.jl index f561ea6..e4836dc 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -386,7 +386,7 @@ function reg( # Compute standard error vcov_data = Vcov.VcovData(Xhat, crossx, residuals, dof_residual_) - matrix_vcov = StatsBase.vcov(vcov_data, vcov_method) + matrix_vcov = StatsAPI.vcov(vcov_data, vcov_method) # Compute Fstat F = Fstat(coef, matrix_vcov, has_intercept) @@ -451,5 +451,5 @@ function reg( esample = trues(N) end - return FixedEffectModel(coef, matrix_vcov, vcov, nclusters, esample, residuals2, augmentdf, fekeys, coef_names, response_name, formula_origin, formula_schema, contrasts, nobs, dof_, dof_residual_, dof_tstat_, rss, tss_total, r2, adjr2, F, p, iterations, converged, r2_within, F_kp, p_kp) + return FixedEffectModel(coef, matrix_vcov, vcov, nclusters, esample, residuals2, augmentdf, fekeys, coef_names, response_name, formula_origin, formula_schema, contrasts, nobs, dof_, dof_tstat_, rss, tss_total, r2, adjr2, F, p, iterations, converged, r2_within, F_kp, p_kp) end diff --git a/test/predict.jl b/test/predict.jl index 4057b45..1989b96 100644 --- a/test/predict.jl +++ b/test/predict.jl @@ -45,53 +45,53 @@ model = @formula Sales ~ Price + StateC result = reg(df, model) @test predict(result, df)[1] ≈ 115.9849874 -model = @formula Sales ~ Price + fe(State) -result = reg(df, model, save = :fe) -@test predict(result, df)[1] ≈ 115.9849874 +#model = @formula Sales ~ Price + fe(State) +#result = reg(df, model, save = :fe) +#@test predict(result)[1] ≈ 115.9849874 model = @formula Sales ~ Price * Pop + StateC result = reg(df, model) @test predict(result, df)[1] ≈ 115.643985352 -model = @formula Sales ~ Price * Pop + fe(State) -result = reg(df, model, save = :fe) -@test predict(result, df)[1] ≈ 115.643985352 +#model = @formula Sales ~ Price * Pop + fe(State) +#result = reg(df, model, save = :fe) +#@test predict(result, df)[1] ≈ 115.643985352 model = @formula Sales ~ Price + Pop + Price & Pop + StateC result = reg(df, model) @test predict(result, df)[1] ≈ 115.643985352 -model = @formula Sales ~ Price + Pop + Price & Pop + fe(State) -result = reg(df, model, save = :fe) -@test predict(result, df)[1] ≈ 115.643985352 +#model = @formula Sales ~ Price + Pop + Price & Pop + fe(State) +#result = reg(df, model, save = :fe) +#@test predict(result, df)[1] ≈ 115.643985352 -## Tests for predict method +# Tests for predict method # Test that predicting from model without saved FE test throws model = @formula Sales ~ Price + fe(State) result = reg(df, model) @test_throws "No estimates for fixed effects found. Fixed effects need to be estimated using the option save = :fe or :all for prediction to work." predict(result, df) # Test basic functionality - adding 1 to price should increase prediction by coef -model = @formula Sales ~ Price + fe(State) -result = reg(df, model, save = :fe) -x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, 1])) -@test last(x) - first(x) ≈ only(result.coef) +#model = @formula Sales ~ Price + fe(State) +#result = reg(df, model, save = :fe) +#x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, 1])) +#@test last(x) - first(x) ≈ only(result.coef) # Missing variables in covariates should yield missing prediction -x = predict(result, DataFrame(Price = [1.0, missing], State = [1, 1])) -@test ismissing(last(x)) +#x = predict(result, DataFrame(Price = [1.0, missing], State = [1, 1])) +#@test ismissing(last(x)) # Missing variables in fixed effects should yield missing prediction -x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, missing])) -@test ismissing(last(x)) +#x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, missing])) +#@test ismissing(last(x)) # Fixed effect levels not in the estimation data should yield missing prediction -x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, 111])) -@test ismissing(last(x)) +#x = predict(result, DataFrame(Price = [1.0, 2.0], State = [1, 111])) +#@test ismissing(last(x)) ############################################################################## ## @@ -172,7 +172,6 @@ result = reg(df, model, save = :residuals) model = @formula Sales ~ Price + fe(State) result = reg(df, model, save = :fe) -@test residuals(result) === nothing @test "fe_State" ∈ names(fe(result))