Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Calculate F-statistics for Tests of Between-Subjects Effects (Type III, ANOVA) #508

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
37 changes: 37 additions & 0 deletions data/rds1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
Subject Sequence Period Formulation Var
3 TR 1 T 225.95
1 RT 1 R 181.09
2 RT 1 R 114.48
4 RT 1 R 176.91
5 TR 1 T 147.01
6 TR 1 T 97.53
7 RT 1 R 146.60
8 TR 1 T 45.58
9 RT 1 R 109.20
10 RT 1 R 125.61
11 TR 1 T 92.26
12 RT 1 R 237.95
13 TR 1 T 145.46
14 TR 1 T 179.96
15 TR 1 T 173.86
16 RT 1 R 144.00
17 RT 1 R 185.10
18 TR 1 T 117.99
1 RT 2 T 210.14
2 RT 2 T 98.72
3 TR 2 R 241.09
4 RT 2 T 186.65
5 TR 2 R 139.56
6 TR 2 R 124.77
7 RT 2 T 137.62
8 TR 2 R 57.71
9 RT 2 T 139.36
10 RT 2 T 120.43
11 TR 2 R 116.10
12 RT 2 T 228.63
13 TR 2 R 165.09
14 TR 2 R 181.09
15 TR 2 R 206.66
16 RT 2 T 143.25
17 RT 2 T 192.22
18 TR 2 R 125.50
186 changes: 186 additions & 0 deletions src/ftest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,189 @@ function show(io::IO, ftr::FTestResult{N}) where N
end
print(io, '─'^totwidth)
end


##############################################
# Tests of Between-Subjects Effects
# Baset on F-statistics
# L: The s×p full row rank matrix. The rows are estimable functions. s≥1 where p number of coefs
"""
θ + A * B * A'

Change θ (only upper triangle). B is symmetric.
"""
function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
axb = axes(B, 1)
sa = size(A, 1)
for j ∈ axb
for i ∈ axb
@inbounds Bij = B[i, j]
for n ∈ 1:sa
@inbounds Anj = A[n, j]
BijAnj = Bij * Anj
@simd for m ∈ 1:n
@inbounds θ[m, n] += A[m, i] * BijAnj
end
end
end
end
θ
end

# See SPSS (GLM/UNIANOVA) and SAS (PROC GLM) documentation
# https://www.ibm.com/docs/en/spss-statistics/29.0.0?topic=effects-tests-between-subjects
# L is a s×p matrix corresponding to plan-matrix of Factor
# p - number of columns - coefs number
# s - number of levels for this factor in the model
# For Example
# If you have model matrix with Intercept and two factors A and B with 3 and 4 levels
# with Dummy coding you will have:
#
# I A2 A3 B2 B3 B4
# 1 1 0 1 0 0
# 1 1 0 1 0 0
# 1 1 0 1 0 0
# 1 1 0 1 0 0
# 1 0 1 1 0 0
# 1 0 1 0 1 0
# 1 0 1 0 1 0
# 1 0 1 0 1 0
# 1 0 1 0 1 0
# 1 0 0 0 0 1
# 1 0 0 0 0 1
# 1 0 0 0 0 1
# 1 0 0 0 0 0
# 1 0 0 0 0 0
#
# Then you wil have L matrix for intercept:
#
# 1 0 0 0 0 0
#
# For A:
#
# 0 1 0 0 0 0
# 0 0 1 0 0 0
#
# For B:
#
# 0 0 0 1 0 0
# 0 0 0 0 1 0
# 0 0 0 0 0 1
#
"""
lcontrast(obj, i::Int)

L-contrast matrix for `i` fixed effect.
"""
function lcontrast(obj, i::Int)
n = length(obj.formula.rhs.terms)
cn = length(coef(obj))
if i > n || n < 1 error("Factor number out of range 1-$(n)") end
term = obj.formula.rhs.terms[i]
prev = 0
if i > 1
for j = 1:i-1
prev += width(obj.formula.rhs.terms[j])
end
end
#=
if isa(term, CategoricalTerm)
cm = term.contrasts.matrix
mx = zeros(Float64, size(cm, 1), cn)
view(mx, :, prev+1:prev+width(term)) .= cm
elseif isa(term, InteractionTerm)
m = width(term)
mx = zeros(Float64, m, cn)
for j = 1:m
mx[j, j+prev] = 1
end
else
mx = zeros(Float64, 1, cn)
mx[1, prev+1] = 1
end
mx
=#

p = length(coef(obj)) # number of coefs
inds = prev+1:prev+width(term)
if typeof(term) <: CategoricalTerm
mxc = zeros(size(term.contrasts.matrix, 1), p)
mxcv = view(mxc, :, inds)
mxcv .= term.contrasts.matrix
mx = zeros(size(term.contrasts.matrix, 1) - 1, p)
for i = 2:size(term.contrasts.matrix, 1) # correct for zero-intercept model
mx[i-1, :] .= mxc[i, :] - mxc[1, :]
end
else
mx = zeros(length(inds), p) # unknown correctness for zero-intercept model
for j = 1:length(inds)
mx[j, inds[j]] = 1
end
end
mx

end

tname(t::AbstractTerm) = "$(t.sym)"
tname(t::InteractionTerm) = join(tname.(t.terms), " & ")
tname(t::InterceptTerm) = "(Intercept)"

"""
typeiii(obj)

Calculate F-statistics for Tests of Between-Subjects Effects.
Sum of squares and MS not calculated.

"""
function typeiii(obj)
V = vcov(obj)
replace!(V, NaN => 0) # Some values can be NaN - replace it to zero
B = coef(obj)
c = length(obj.formula.rhs.terms)
d = Vector{Int}(undef, 0)
fac = Vector{String}(undef, c)
F = Vector{Float64}(undef,c)
df = Vector{Tuple{Float64, Float64}}(undef, c)
pval = Vector{Float64}(undef, c)
for i = 1:c
# Make L matrix
L = lcontrast(obj, i)
if typeof(obj.formula.rhs.terms[i]) <: InterceptTerm{false} # If zero intercept (drop)
push!(d, i)
fac[i] = ""
continue
else
fac[i] = tname(obj.formula.rhs.terms[i])
end
# For case when cofs is zero (or NaN) we reduce rank of L-matrix
for c = 1:length(B)
if isnan(B[c]) || iszero(B[c])
L[:, c] .= 0
end
end
RL = rank(L) # Rank of L matrix
# F-statistics computed:
# F[i] = (L'*B' * pinv(L * V * L') * L * B) / rank(L)
# As V is symmetric we can calc only upper triangle
# θ = L * V * L'
θ = zeros(size(L, 1), size(L, 1))
mulαβαtinc!(θ, L, V)
LB = L * B
# Then F can be computed:
# F[i] = (LB' * pinv(Symmetric(θ)) * LB)/rank(L)
F[i] = dot(LB, pinv(Symmetric(θ)), LB) / RL
df[i] = (RL, dof_residual(obj))
if iszero(df[i][1])
pval[i] = NaN
else
pval[i] = ccdf(FDist(df[i][1], df[i][2]), F[i])
end
end
if length(d) > 0
deleteat!(fac, d)
deleteat!(F, d)
deleteat!(df, d)
deleteat!(pval, d)
end
CoefTable([df, F, pval], ["DF/DDF", "F", "Pr(>F)"], fac, 3, 2)
end
57 changes: 57 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1837,3 +1837,60 @@ end
# 3. 44 / wt == y
@test GLM.loglik_obs(Binomial(), y, μ, wt, ϕ) ≈ GLM.logpdf(Binomial(Int(wt), μ), 44)
end

berds1 = CSV.read(joinpath(glm_datadir, "rds1.csv"), DataFrame)
berds1.Period = categorical(berds1.Period)
berds1.Subject = categorical(berds1.Subject)

@testset "Tests of Between-Subjects Effects" begin
# This is not BE model - no subject
# Against SPSS 28
#=
GLM Var BY Sequence Period Formulation
/METHOD=SSTYPE(3)
/INTERCEPT=INCLUDE
/PRINT PARAMETER
/CRITERIA=ALPHA(.05)
/DESIGN=Sequence Period Formulation.
=#
# Intercept not included in test

# Basic model
ols = lm(@formula(Var ~ Sequence+Period+Formulation), berds1)
tbl = GLM.typeiii(ols)
@test tbl.cols[2][2] ≈ 1.011001 atol = 1.0E-6
@test tbl.cols[2][3] ≈ 0.328551 atol = 1.0E-6
@test tbl.cols[2][4] ≈ 0.106973 atol = 1.0E-6
@test tbl.cols[3][2] ≈ 0.322206 atol = 1.0E-6
@test tbl.cols[3][3] ≈ 0.570520 atol = 1.0E-6
@test tbl.cols[3][4] ≈ 0.745747 atol = 1.0E-6
#=
GLM Var BY Sequence Period Formulation
/METHOD=SSTYPE(3)
/INTERCEPT=EXCLUDE
/PRINT PARAMETER
/CRITERIA=ALPHA(.05)
/DESIGN=Sequence Period Formulation.
=#

# Zero intercep
ols = lm(@formula(Var ~ 0+Sequence+Period+Formulation), berds1)
tbl = GLM.typeiii(ols)
@test tbl.cols[2][1] ≈ 1.011001 atol = 1.0E-6
@test tbl.cols[2][2] ≈ 0.328551 atol = 1.0E-6
@test tbl.cols[2][3] ≈ 0.106973 atol = 1.0E-6
@test tbl.cols[3][1] ≈ 0.322206 atol = 1.0E-6
@test tbl.cols[3][2] ≈ 0.570520 atol = 1.0E-6
@test tbl.cols[3][3] ≈ 0.745747 atol = 1.0E-6

# Crossed factors
ols = lm(@formula(Var ~ 1+Sequence&Period), berds1)
tbl = GLM.typeiii(ols)
@test tbl.cols[2][2] ≈ 0.482175 atol = 1.0E-6
@test tbl.cols[3][2] ≈ 0.696996 atol = 1.0E-6

# Crossed factors (zero - intercept)
ols = lm(@formula(Var ~ 0+Sequence&Period), berds1)
tbl = GLM.typeiii(ols)
@test tbl.cols[2][1] ≈ 87.103976 atol = 1.0E-6
end