JuliaStats · PharmCat · Nov 4, 2022 · Nov 5, 2022 · Nov 5, 2022 · Nov 6, 2022
diff --git a/data/rds1.csv b/data/rds1.csv
@@ -0,0 +1,37 @@
+Subject	Sequence	Period	Formulation	Var
+3	TR	1	T	225.95
+1	RT	1	R	181.09
+2	RT	1	R	114.48
+4	RT	1	R	176.91
+5	TR	1	T	147.01
+6	TR	1	T	97.53
+7	RT	1	R	146.60
+8	TR	1	T	45.58
+9	RT	1	R	109.20
+10	RT	1	R	125.61
+11	TR	1	T	92.26
+12	RT	1	R	237.95
+13	TR	1	T	145.46
+14	TR	1	T	179.96
+15	TR	1	T	173.86
+16	RT	1	R	144.00
+17	RT	1	R	185.10
+18	TR	1	T	117.99
+1	RT	2	T	210.14
+2	RT	2	T	98.72
+3	TR	2	R	241.09
+4	RT	2	T	186.65
+5	TR	2	R	139.56
+6	TR	2	R	124.77
+7	RT	2	T	137.62
+8	TR	2	R	57.71
+9	RT	2	T	139.36
+10	RT	2	T	120.43
+11	TR	2	R	116.10
+12	RT	2	T	228.63
+13	TR	2	R	165.09
+14	TR	2	R	181.09
+15	TR	2	R	206.66
+16	RT	2	T	143.25
+17	RT	2	T	192.22
+18	TR	2	R	125.50
diff --git a/src/ftest.jl b/src/ftest.jl
@@ -228,3 +228,189 @@ function show(io::IO, ftr::FTestResult{N}) where N
     end
     print(io, '─'^totwidth)
 end
+
+
+##############################################
+# Tests of Between-Subjects Effects
+# Baset on F-statistics
+# L: The s×p full row rank matrix. The rows are estimable functions. s≥1 where p number of coefs
+"""
+θ + A * B * A'
+
+Change θ (only upper triangle). B is symmetric.
+"""
+function mulαβαtinc!(θ::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
+    axb  = axes(B, 1)
+    sa   = size(A, 1)
+    for j ∈ axb
+        for i ∈ axb
+            @inbounds Bij = B[i, j]
+            for n ∈ 1:sa
+                @inbounds Anj = A[n, j]
+                BijAnj = Bij * Anj
+                @simd for m ∈ 1:n
+                    @inbounds θ[m, n] +=  A[m, i] * BijAnj
+                end
+            end
+        end
+    end
+    θ
+end
+
+# See SPSS (GLM/UNIANOVA) and SAS (PROC GLM) documentation 
+# https://www.ibm.com/docs/en/spss-statistics/29.0.0?topic=effects-tests-between-subjects
+# L is a s×p matrix corresponding to plan-matrix of Factor
+# p - number of columns - coefs number
+# s - number of levels for this factor in the model
+# For Example
+# If you have model matrix with Intercept and two factors A and B with 3 and 4 levels
+# with Dummy coding you will have:
+# 
+# I A2 A3 B2 B3 B4
+# 1 1  0  1  0  0
+# 1 1  0  1  0  0
+# 1 1  0  1  0  0
+# 1 1  0  1  0  0
+# 1 0  1  1  0  0
+# 1 0  1  0  1  0
+# 1 0  1  0  1  0
+# 1 0  1  0  1  0
+# 1 0  1  0  1  0
+# 1 0  0  0  0  1
+# 1 0  0  0  0  1
+# 1 0  0  0  0  1
+# 1 0  0  0  0  0
+# 1 0  0  0  0  0
+#
+# Then you wil have L matrix for intercept:
+#
+# 1 0  0  0  0  0 
+#
+# For A:
+#
+# 0 1  0  0  0  0
+# 0 0  1  0  0  0
+#
+# For B:
+#
+# 0 0  0  1  0  0
+# 0 0  0  0  1  0
+# 0 0  0  0  0  1
+#
+"""
+    lcontrast(obj, i::Int)
+
+L-contrast matrix for `i` fixed effect.
+"""
+function lcontrast(obj, i::Int)
+    n = length(obj.formula.rhs.terms)
+    cn = length(coef(obj))
+    if i > n || n < 1 error("Factor number out of range 1-$(n)") end
+    term = obj.formula.rhs.terms[i]
+    prev = 0
+    if i > 1
+        for j = 1:i-1
+            prev += width(obj.formula.rhs.terms[j])
+        end
+    end
+    #=
+    if isa(term, CategoricalTerm)
+        cm = term.contrasts.matrix
+        mx = zeros(Float64, size(cm, 1), cn)
+        view(mx, :, prev+1:prev+width(term)) .= cm
+    elseif isa(term, InteractionTerm)
+        m = width(term)
+        mx = zeros(Float64, m, cn)
+        for j = 1:m 
+            mx[j, j+prev] = 1
+        end
+    else
+        mx = zeros(Float64, 1, cn)
+        mx[1, prev+1] = 1
+    end
+    mx
+    =#
+
+    p    = length(coef(obj)) # number of coefs
+    inds = prev+1:prev+width(term)
+    if typeof(term) <: CategoricalTerm
+        mxc   = zeros(size(term.contrasts.matrix, 1), p)
+        mxcv  = view(mxc, :, inds)
+        mxcv .= term.contrasts.matrix
+        mx    = zeros(size(term.contrasts.matrix, 1) - 1, p)
+        for i = 2:size(term.contrasts.matrix, 1) # correct for zero-intercept model
+            mx[i-1, :] .= mxc[i, :] - mxc[1, :]
+        end
+    else
+        mx = zeros(length(inds), p) # unknown correctness for zero-intercept model
+        for j = 1:length(inds)
+            mx[j, inds[j]] = 1
+        end
+    end
+    mx
+
+end
+
+tname(t::AbstractTerm) = "$(t.sym)"
+tname(t::InteractionTerm) = join(tname.(t.terms), " & ")
+tname(t::InterceptTerm) = "(Intercept)"
+
+"""
+    typeiii(obj)
+
+Calculate F-statistics for Tests of Between-Subjects Effects. 
+Sum of squares and MS not calculated.
+
+"""
+function typeiii(obj)
+    V           = vcov(obj) 
+    replace!(V, NaN => 0) # Some values can be NaN - replace it to zero
+    B           = coef(obj)   
+    c           = length(obj.formula.rhs.terms)
+    d           = Vector{Int}(undef, 0)
+    fac         = Vector{String}(undef, c)
+    F           = Vector{Float64}(undef,c)
+    df          = Vector{Tuple{Float64, Float64}}(undef, c)
+    pval        = Vector{Float64}(undef, c)
+    for i = 1:c
+        # Make L matrix
+        L       = lcontrast(obj, i)
+        if typeof(obj.formula.rhs.terms[i]) <: InterceptTerm{false} # If zero intercept (drop)
+            push!(d, i)
+            fac[i] = ""
+            continue
+        else
+            fac[i] = tname(obj.formula.rhs.terms[i])
+        end
+        # For case when cofs is zero (or NaN) we reduce rank of L-matrix
+        for c = 1:length(B)
+            if isnan(B[c]) || iszero(B[c])
+                L[:, c] .= 0
+            end
+        end
+        RL = rank(L) # Rank of L matrix
+        # F-statistics computed:
+        # F[i]    = (L'*B' * pinv(L * V * L') * L * B) / rank(L)
+        # As V is symmetric we can calc only upper triangle
+        # θ = L * V * L'
+        θ = zeros(size(L, 1), size(L, 1))
+        mulαβαtinc!(θ, L, V)
+        LB = L * B
+        # Then F can be computed:
+        # F[i]    = (LB' * pinv(Symmetric(θ)) * LB)/rank(L)
+        F[i]    = dot(LB, pinv(Symmetric(θ)), LB) / RL
+        df[i]   = (RL, dof_residual(obj))
+        if iszero(df[i][1])
+            pval[i] = NaN
+        else
+            pval[i] = ccdf(FDist(df[i][1], df[i][2]), F[i])
+        end
+    end
+    if length(d) > 0
+        deleteat!(fac, d)
+        deleteat!(F, d)
+        deleteat!(df, d)
+        deleteat!(pval, d)
+    end
+    CoefTable([df, F, pval], ["DF/DDF", "F", "Pr(>F)"], fac, 3, 2)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1837,3 +1837,60 @@ end
     # 3. 44 / wt == y
     @test GLM.loglik_obs(Binomial(), y, μ, wt, ϕ) ≈ GLM.logpdf(Binomial(Int(wt), μ), 44)
 end
+
+berds1 = CSV.read(joinpath(glm_datadir, "rds1.csv"), DataFrame)
+berds1.Period = categorical(berds1.Period)
+berds1.Subject = categorical(berds1.Subject)
+
+@testset "Tests of Between-Subjects Effects" begin
+    # This is not BE model - no subject 
+    # Against SPSS 28
+    #=
+    GLM Var BY Sequence Period Formulation 
+  /METHOD=SSTYPE(3) 
+  /INTERCEPT=INCLUDE 
+  /PRINT PARAMETER 
+  /CRITERIA=ALPHA(.05) 
+  /DESIGN=Sequence Period Formulation.
+    =#
+    # Intercept not included in test
+
+    # Basic model
+    ols  = lm(@formula(Var ~ Sequence+Period+Formulation), berds1)
+    tbl = GLM.typeiii(ols)
+    @test tbl.cols[2][2] ≈ 1.011001 atol = 1.0E-6
+    @test tbl.cols[2][3] ≈ 0.328551 atol = 1.0E-6
+    @test tbl.cols[2][4] ≈ 0.106973 atol = 1.0E-6
+    @test tbl.cols[3][2] ≈ 0.322206 atol = 1.0E-6
+    @test tbl.cols[3][3] ≈ 0.570520 atol = 1.0E-6
+    @test tbl.cols[3][4] ≈ 0.745747 atol = 1.0E-6
+    #=
+    GLM Var BY Sequence Period Formulation 
+  /METHOD=SSTYPE(3) 
+  /INTERCEPT=EXCLUDE 
+  /PRINT PARAMETER 
+  /CRITERIA=ALPHA(.05) 
+  /DESIGN=Sequence Period Formulation.
+    =#
+
+    # Zero intercep
+    ols  = lm(@formula(Var ~ 0+Sequence+Period+Formulation), berds1)
+    tbl = GLM.typeiii(ols)
+    @test tbl.cols[2][1] ≈ 1.011001 atol = 1.0E-6
+    @test tbl.cols[2][2] ≈ 0.328551 atol = 1.0E-6
+    @test tbl.cols[2][3] ≈ 0.106973 atol = 1.0E-6
+    @test tbl.cols[3][1] ≈ 0.322206 atol = 1.0E-6
+    @test tbl.cols[3][2] ≈ 0.570520 atol = 1.0E-6
+    @test tbl.cols[3][3] ≈ 0.745747 atol = 1.0E-6
+
+    # Crossed factors
+    ols  = lm(@formula(Var ~ 1+Sequence&Period), berds1)
+    tbl = GLM.typeiii(ols)
+    @test tbl.cols[2][2] ≈ 0.482175 atol = 1.0E-6
+    @test tbl.cols[3][2] ≈ 0.696996 atol = 1.0E-6
+
+    # Crossed factors (zero - intercept)
+    ols  = lm(@formula(Var ~ 0+Sequence&Period), berds1)
+    tbl = GLM.typeiii(ols)
+    @test tbl.cols[2][1] ≈ 87.103976 atol = 1.0E-6
+end