diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index c8dc5fef..7522071d 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -1,7 +1,7 @@ name: CompatHelper on: schedule: - - cron: '0 0 * * 1' + - cron: '0 0 1 * *' workflow_dispatch: jobs: CompatHelper: diff --git a/.github/workflows/Tier1.yml b/.github/workflows/Tier1.yml index b56fc642..d9481f6b 100644 --- a/.github/workflows/Tier1.yml +++ b/.github/workflows/Tier1.yml @@ -31,8 +31,8 @@ jobs: matrix: version: - '1.6' - - '1.7' - '1.8' + - '1' os: - ubuntu-latest - macOS-latest diff --git a/Project.toml b/Project.toml index 7f7f7dfc..c91d01e5 100644 --- a/Project.toml +++ b/Project.toml @@ -3,7 +3,7 @@ uuid = "a1dec852-9fe5-11e9-361f-8d9fde67cfa2" keywords = ["lenearmodel", "mixedmodel"] desc = "Mixed-effects models with flexible covariance structure." authors = ["Vladimir Arnautov "] -version = "0.14.9" +version = "0.15.0" [deps] DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" @@ -20,13 +20,13 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] DiffResults = "1" -Distributions = "0.20, 0.21, 0.22, 0.23, 0.24, 0.25" +Distributions = "0.21, 0.22, 0.23, 0.24, 0.25" ForwardDiff = "0.10" LineSearches = "7" MetidaBase = "0.11, 0.12" Optim = "1" ProgressMeter = "1" -StatsBase = "0.29, 0.30, 0.31, 0.32, 0.33, 0.34" +StatsBase = "0.30, 0.31, 0.32, 0.33, 0.34" StatsModels = "0.7" julia = "1" diff --git a/docs/src/custom.md b/docs/src/custom.md index 1c44700e..2dcfac16 100644 --- a/docs/src/custom.md +++ b/docs/src/custom.md @@ -91,7 +91,7 @@ Example: using Metida, DataFrames, CSV, CategoricalArrays ftdf = CSV.File(joinpath(dirname(pathof(Metida)), "..", "test", "csv", "1fptime.csv"); types = [String, String, Float64, Float64]) |> DataFrame -df0 = CSV.File(joinpath(dirname(pathof(Metida)), "..", "test", "csv", "df0.csv"); types = [String, String, String, String,Float64, Float64]) |> DataFrame +df0 = CSV.File(joinpath(dirname(pathof(Metida)), "..", "test", "csv", "df0.csv"); types = [String, String, String, String,Float64, Float64, Float64]) |> DataFrame struct CustomCovarianceStructure <: Metida.AbstractCovarianceType end function Metida.covstrparam(ct::CustomCovarianceStructure, t::Int)::Tuple{Int, Int} diff --git a/docs/src/details.md b/docs/src/details.md index 4ecea801..4fceee5c 100644 --- a/docs/src/details.md +++ b/docs/src/details.md @@ -33,7 +33,7 @@ logREML(\theta,\beta) = -\frac{N-p}{2} - \frac{1}{2}\sum_{i=1}^nlog|V_{\theta, i -\frac{1}{2}log|\sum_{i=1}^nX_i'V_{\theta, i}^{-1}X_i|-\frac{1}{2}\sum_{i=1}^n(y_i - X_{i}\beta)'V_{\theta, i}^{-1}(y_i - X_{i}\beta) ``` -Actually ```L(\theta) = -2logREML = L_1(\theta) + L_2(\theta) + L_3(\theta) + c`` used for optimization, where: +Actually ```L(\theta) = -2logREML = L_1(\theta) + L_2(\theta) + L_3(\theta) + c``` used for optimization, where: ```math L_1(\theta) = \frac{1}{2}\sum_{i=1}^nlog|V_{i}| \\ @@ -51,6 +51,27 @@ L_3(\theta) = \frac{1}{2}\sum_{i=1}^n(y_i - X_{i}\beta)'V_i^{-1}(y_i - X_{i}\bet \mathcal{H}\mathcal{L}(\theta) = \mathcal{H}L_1(\theta) + \mathcal{H}L_2(\theta) + \mathcal{H} L_3(\theta) ``` +#### Weights + +If weights defined: + +```math +V_{i} = Z_{i} G Z_i'+ W^{- \frac{1}{2}}_i R_{i} W^{- \frac{1}{2}}_i +``` + + +where ```W``` - diagonal matrix of weights. + + +#### Multiple random and repeated effects + +If model include multiple effects ( with n random and m repeated effects) final V will be: + +```math +V_{i} = Z_{i, 1} G_{1} Z_{i, 1}' + ... + Z_{i, n} G_{1} Z_{i, n}'+ W^{- \frac{1}{2}}_i ( R_{i, 1} + ... + R_{i, m}) W^{- \frac{1}{2}}_i +``` + + ##### Initial step Initial (first) step before optimization may be done: diff --git a/docs/src/instanduse.md b/docs/src/instanduse.md index db11b892..09faa541 100644 --- a/docs/src/instanduse.md +++ b/docs/src/instanduse.md @@ -63,8 +63,8 @@ Define `random` and `repreated` effects with [`Metida.VarEffect`](@ref) using [` right side is a effect itself. [`Metida.HeterogeneousCompoundSymmetry`](@ref) and [`Metida.Diag`](@ref) (Diagonal) in example bellow is a model of variance-covariance structure. See also [`Metida.@lmmformula`](@ref) macro. !!! note - In some cases levels of repeated effect should not be equal inside each level of subject or model will not have any sense. For example, it is assumed that usually CSH or UN (Unstructured) using with levels of repeated effect is different inside each level of subject. - Metida does not check this! + + In some cases levels of repeated effect should not be equal inside each level of subject or model will not have any sense. For example, it is assumed that usually CSH or UN (Unstructured) using with levels of repeated effect is different inside each level of subject. Metida does not check this! ```@example lmmexample @@ -73,6 +73,15 @@ random = VarEffect(@covstr(formulation|subject), CSH), repeated = VarEffect(@covstr(formulation|subject), DIAG)); ``` +Also [`Metida.@lmmformula`](@ref) macro can be used: + +```julia +lmm = LMM(@lmmformula(var~sequence+period+formulation, + random = formulation|subject:CSH, + repeated = formulation|subject:DIAG), + df) +``` + #### Step 3: Fit Just fit the model. @@ -87,6 +96,23 @@ fit!(lmm) lmm.log ``` +#### Confidence intervals for coefficients + + +```@example lmmexample +confint(lmm) +``` + +!!! note + + Satterthwaite approximation for the denominator degrees of freedom used by default. + +#### StatsBsae API + +StatsBsae API implemented: [`Metida.islinear`](@ref), [`Metida.confint`](@ref), [`Metida.coef`](@ref), [`Metida.coefnames`](@ref), [`Metida.dof_residual`](@ref), [`Metida.dof`](@ref), [`Metida.loglikelihood`](@ref), [`Metida.aic`](@ref), [`Metida.bic`](@ref), [`Metida.aicc`](@ref), [`Metida.isfitted`](@ref), [`Metida.vcov`](@ref), [`Metida.stderror`](@ref), [`Metida.modelmatrix`](@ref), [`Metida.response`](@ref), [`Metida.crossmodelmatrix`](@ref), [`Metida.coeftable`](@ref), [`Metida.responsename`](@ref) + + + ##### Type III Tests of Fixed Effects !!! warning diff --git a/src/dof_contain.jl b/src/dof_contain.jl index feeccaab..d3f621ce 100644 --- a/src/dof_contain.jl +++ b/src/dof_contain.jl @@ -41,8 +41,8 @@ end Minimum returned. If no random effect found N - rank(XZ) returned. """ function dof_contain(lmm, i) - ind = lmm.mm.assign[i] - sym = StatsModels.termvars(lmm.mf.f.rhs.terms[ind]) + ind = lmm.modstr.assign[i] + sym = StatsModels.termvars(lmm.f.rhs.terms[ind]) rr = Vector{Int}(undef, 0) for r = 1:length(lmm.covstr.random) if length(intersect(sym, StatsModels.termvars(lmm.covstr.random[r].model))) > 0 @@ -57,12 +57,12 @@ function dof_contain(lmm, i) end function dof_contain(lmm) - dof = zeros(Int, length(lmm.mm.assign)) + dof = zeros(Int, length(lmm.modstr.assign)) rrt = zeros(Int, length(lmm.covstr.random)) rz = 0 - for i = 1:length(lmm.mm.assign) - ind = lmm.mm.assign[i] - sym = StatsModels.termvars(lmm.mf.f.rhs.terms[ind]) + for i = 1:length(lmm.modstr.assign) + ind = lmm.modstr.assign[i] + sym = StatsModels.termvars(lmm.f.rhs.terms[ind]) rr = Vector{Int}(undef, 0) for r = 1:length(lmm.covstr.random) if length(intersect(sym, StatsModels.termvars(lmm.covstr.random[r].model))) > 0 @@ -87,7 +87,7 @@ end """ function dof_contain_f(lmm, i) - sym = StatsModels.termvars(lmm.mf.f.rhs.terms[i]) + sym = StatsModels.termvars(lmm.f.rhs.terms[i]) rr = Vector{Int}(undef, 0) for r = 1:length(lmm.covstr.random) if length(intersect(sym, StatsModels.termvars(lmm.covstr.random[r].model))) > 0 diff --git a/src/fit.jl b/src/fit.jl index baa1118b..ec3b85b2 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -51,6 +51,7 @@ Fit LMM model. * `varlinkf` - :exp / :sq / :identity [ref](@ref varlink_header) * `rholinkf` - :sigm / :atan / :sqsigm / :psigm * `aifirst` - first iteration with AI-like method - :default / :ai / :score +* `aifmax` - maximum pre-optimization steps * `g_tol` - absolute tolerance in the gradient * `x_tol` - absolute tolerance of theta vector * `f_tol` - absolute tolerance in changes of the REML diff --git a/src/gmat.jl b/src/gmat.jl index 4e2786ce..c6346c95 100644 --- a/src/gmat.jl +++ b/src/gmat.jl @@ -89,11 +89,12 @@ end #CS function gmat!(mx, θ, ::CS_) s = size(mx, 1) - mx .= θ[1]^2 + θ₁² = θ[1]^2 + mx .= θ₁² if s > 1 - mxθ2 = θ[1]^2 * θ[2] + mxθ2 = θ₁² * θ[2] for n = 2:s - @inbounds @simd for m = 1:n-1 + @inbounds @simd for m = 1:n - 1 mx[m, n] = mxθ2 end end @@ -212,8 +213,8 @@ function gmat!(mx, θ, ::UN_) end if s > 1 for n = 2:s - @inbounds @simd for m = 1:n-1 - mx[m, n] = mx[m, m] * mx[n, n] * θ[s+tpnum(m, n, s)] + @inbounds @simd for m = 1:n - 1 + mx[m, n] = mx[m, m] * mx[n, n] * θ[s + tpnum(m, n, s)] end end end diff --git a/src/linearalgebra.jl b/src/linearalgebra.jl index 05f6963a..79d5093f 100644 --- a/src/linearalgebra.jl +++ b/src/linearalgebra.jl @@ -139,6 +139,20 @@ Change θ. end θ end +# Diagonal(b) * A * Diagonal(b) - chnage only A upper triangle +@noinline function mulβdαβd!(A::AbstractMatrix, b::AbstractVector) + q = size(A, 1) + p = size(A, 2) + if !(q == p == length(b)) throw(DimensionMismatch("size(A, 1) and size(A, 2) should be equal length(b)")) end + for n in 1:p + @simd for m in 1:n + @inbounds A[m, n] *= b[m] * b[n] + end + end + A +end + + ################################################################################ @inline function tmul_unsafe(rz, θ::AbstractVector{T}) where T vec = zeros(T, size(rz, 1)) diff --git a/src/lmm.jl b/src/lmm.jl index 2181f89a..cdeb5c7f 100644 --- a/src/lmm.jl +++ b/src/lmm.jl @@ -5,8 +5,13 @@ struct LMMLogMsg msg::String end + +struct ModelStructure + assign::Vector{Int64} +end + """ - LMM(model, data; contrasts=Dict{Symbol,Any}(), random::Union{Nothing, VarEffect, Vector{VarEffect}} = nothing, repeated::Union{Nothing, VarEffect} = nothing) + LMM(model, data; contrasts=Dict{Symbol,Any}(), random::Union{Nothing, VarEffect, Vector{VarEffect}} = nothing, repeated::Union{Nothing, VarEffect} = nothing, wts::Union{Nothing, AbstractVector, AbstractString, Symbol} = nothing) Make Linear-Mixed Model object. @@ -18,14 +23,16 @@ Make Linear-Mixed Model object. `random`: vector of random effects or single random effect -`repeated`: is a repeated effect (only one) +`repeated`: is a repeated effect or vector + +`wts`: regression weights (residuals). See also: [`@lmmformula`](@ref) """ -struct LMM{T<:AbstractFloat} <: MetidaModel +struct LMM{T <: AbstractFloat, W <: Union{LMMWts, Nothing}} <: MetidaModel model::FormulaTerm - mf::ModelFrame - mm::ModelMatrix + f::FormulaTerm + modstr::ModelStructure covstr::CovStructure data::LMMData{T} dv::LMMDataViews{T} @@ -33,11 +40,12 @@ struct LMM{T<:AbstractFloat} <: MetidaModel rankx::Int result::ModelResult maxvcbl::Int + wts::Union{Nothing, LMMWts} log::Vector{LMMLogMsg} function LMM(model::FormulaTerm, - mf::ModelFrame, - mm::ModelMatrix, + f::FormulaTerm, + modstr::ModelStructure, covstr::CovStructure, data::LMMData{T}, dv::LMMDataViews{T}, @@ -45,10 +53,11 @@ struct LMM{T<:AbstractFloat} <: MetidaModel rankx::Int, result::ModelResult, maxvcbl::Int, - log::Vector{LMMLogMsg}) where T - new{T}(model, mf, mm, covstr, data, dv, nfixed, rankx, result, maxvcbl, log) + wts::W, + log::Vector{LMMLogMsg}) where T where W <: Union{LMMWts, Nothing} + new{T, W}(model, f, modstr, covstr, data, dv, nfixed, rankx, result, maxvcbl, wts, log) end - function LMM(model, data; contrasts=Dict{Symbol,Any}(), random::Union{Nothing, VarEffect, Vector{VarEffect}} = nothing, repeated::Union{Nothing, VarEffect} = nothing) + function LMM(model, data; contrasts=Dict{Symbol,Any}(), random::Union{Nothing, VarEffect, Vector{VarEffect}} = nothing, repeated::Union{Nothing, VarEffect, Vector{VarEffect}} = nothing, wts::Union{Nothing, AbstractVector, AbstractString, Symbol} = nothing) #need check responce - Float if !Tables.istable(data) error("Data not a table!") end if repeated === nothing && random === nothing @@ -62,16 +71,25 @@ struct LMM{T<:AbstractFloat} <: MetidaModel if !isnothing(repeated) union!(tv, termvars(repeated)) end + if !isnothing(wts) && wts isa Union{AbstractString, Symbol} + if wts isa String wts = Symbol(wts) end + union!(tv, (wts,)) + end ct = Tables.columntable(data) if !(tv ⊆ keys(ct)) error("Some column(s) not found!") end data, data_ = StatsModels.missing_omit(NamedTuple{tuple(tv...)}(ct)) lmmlog = Vector{LMMLogMsg}(undef, 0) sch = schema(model, data, contrasts) f = apply_schema(model, sch, MetidaModel) - mf = ModelFrame(f, sch, data, MetidaModel) + + rmf, lmf = modelcols(f, data) + + assign = StatsModels.asgn(f) + + #mf = ModelFrame(f, sch, data, MetidaModel) #mf = ModelFrame(model, data; contrasts = contrasts) - mm = ModelMatrix(mf) - nfixed = nterms(mf) + #mm = ModelMatrix(mf) + nfixed = fixedeffn(f) if repeated === nothing repeated = NOREPEAT end @@ -80,11 +98,16 @@ struct LMM{T<:AbstractFloat} <: MetidaModel random = VarEffect(Metida.@covstr(0|1), Metida.RZero()) end if !isa(random, Vector) random = [random] end - - if repeated.covtype.s == :SI && !isa(repeated.model, ConstantTerm) - lmmlog!(lmmlog, 1, LMMLogMsg(:WARN, "Repeated effect not a constant, but covariance type is SI. ")) + if !isa(repeated, Vector) repeated = [repeated] end + for r in repeated + if r.covtype.s == :SI && !isa(r.model, ConstantTerm) + lmmlog!(lmmlog, 1, LMMLogMsg(:WARN, "Repeated effect not a constant, but covariance type is SI. ")) + end end - lmmdata = LMMData(modelmatrix(mf), response(mf)) + #rmf = response(mf) + if !(eltype(rmf) <: AbstractFloat) @warn "Response variable not <: AbstractFloat" end + lmmdata = LMMData(lmf, rmf) + covstr = CovStructure(random, repeated, data) coefn = size(lmmdata.xv, 2) rankx = rank(lmmdata.xv) @@ -92,11 +115,28 @@ struct LMM{T<:AbstractFloat} <: MetidaModel @warn "Fixed-effect matrix not full-rank!" lmmlog!(lmmlog, 1, LMMLogMsg(:WARN, "Fixed-effect matrix not full-rank!")) end + + if isnothing(wts) + lmmwts = nothing + else + if wts isa Symbol + wts = Tables.getcolumn(data, wts) + end + if length(lmmdata.yv) == length(wts) + if any(x -> x <= zero(x), wts) error("Only cases with positive weights allowed!") end + lmmwts = LMMWts(wts, covstr.vcovblock) + else + @warn "wts count not equal observations count! wts not used." + lmmwts = nothing + end + end + mres = ModelResult(false, nothing, fill(NaN, covstr.tl), NaN, fill(NaN, coefn), nothing, fill(NaN, coefn, coefn), fill(NaN, coefn), nothing, false) - LMM(model, mf, mm, covstr, lmmdata, LMMDataViews(lmmdata.xv, lmmdata.yv, covstr.vcovblock), nfixed, rankx, mres, findmax(length, covstr.vcovblock)[1], lmmlog) + + LMM(model, f, ModelStructure(assign), covstr, lmmdata, LMMDataViews(lmmdata.xv, lmmdata.yv, covstr.vcovblock), nfixed, rankx, mres, findmax(length, covstr.vcovblock)[1], lmmwts, lmmlog) end - function LMM(f::LMMformula, data; contrasts=Dict{Symbol,Any}(), kwargs...) - LMM(f.formula, data; contrasts=contrasts, random = f.random, repeated = f.repeated) + function LMM(f::LMMformula, data; kwargs...) + LMM(f.formula, data; random = f.random, repeated = f.repeated, kwargs...) end end @@ -145,6 +185,10 @@ end function maxblocksize(mm::MetidaModel) mm.maxvcbl end +function assign(lmm::LMM) + lmm.modstr.assign +end + ################################################################################ function lmmlog!(io, lmmlog::Vector{LMMLogMsg}, verbose, vmsg) if verbose == 1 @@ -187,6 +231,7 @@ end function Base.show(io::IO, lmm::LMM) println(io, "Linear Mixed Model: ", lmm.model) + rn = lmm.covstr.rn for i = 1:length(lmm.covstr.random) println(io, "Random $i: ") if !lmm.covstr.random[i].covtype.z @@ -198,11 +243,13 @@ function Base.show(io::IO, lmm::LMM) end println(io, "Repeated: ") - if lmm.covstr.repeated.formula == NOREPEAT.formula + if lmm.covstr.repeated[1].formula == NOREPEAT.formula println(io," Residual only") else - println(io, " Model: $(lmm.covstr.repeated.model === nothing ? "nothing" : string(lmm.covstr.repeated.model, "|", lmm.covstr.repeated.subj))") - println(io, " Type: $(lmm.covstr.repeated.covtype.s) ($(lmm.covstr.t[end]))") + for i = 1:length(lmm.covstr.repeated) + println(io, " Model: $(lmm.covstr.repeated[i].model === nothing ? "nothing" : string(lmm.covstr.repeated[i].model, "|", lmm.covstr.repeated[i].subj))") + println(io, " Type: $(lmm.covstr.repeated[i].covtype.s) ($(lmm.covstr.t[rn+i]))") + end end println(io, "Blocks: $(nblocks(lmm)), Maximum block size: $(maxblocksize(lmm))") @@ -234,7 +281,14 @@ function Base.show(io::IO, lmm::LMM) view(mx, lmm.covstr.tr[i], 1) .= "Random $i" end end - view(mx, lmm.covstr.tr[end], 1) .= "Residual" + if length(lmm.covstr.repeated) == 1 + view(mx, lmm.covstr.tr[end], 1) .= "Residual" + else + for i = 1:length(lmm.covstr.repeated) + view(mx, lmm.covstr.tr[lmm.covstr.rn + i], 1) .= "Residual $i" + end + + end for i = 1:lmm.covstr.tl if mx[i, 3] == :var mx[i, 4] = round.(mx[i, 4]^2, sigdigits = 6) end end diff --git a/src/lmmdata.jl b/src/lmmdata.jl index b83aca34..bf07fa7b 100644 --- a/src/lmmdata.jl +++ b/src/lmmdata.jl @@ -1,6 +1,6 @@ #lmmdata.jl -struct LMMData{T} +struct LMMData{T<:AbstractFloat} # Fixed effect matrix xv::Matrix{Float64} # Responce vector @@ -13,7 +13,7 @@ struct LMMData{T} end end -struct LMMDataViews{T} <: AbstractLMMDataBlocks +struct LMMDataViews{T<:AbstractFloat} <: AbstractLMMDataBlocks # Fixed effect matrix views xv::Vector{Matrix{Float64}} # Responce vector views @@ -34,3 +34,17 @@ struct LMMDataViews{T} <: AbstractLMMDataBlocks return LMMDataViews(lmm.data.xv, lmm.data.yv, lmm.covstr.vcovblock) end end + +struct LMMWts{T<:AbstractFloat} + sqrtwts::Vector{Vector{T}} + function LMMWts(sqrtwts::Vector{Vector{T}}) where T + new{T}(sqrtwts) + end + function LMMWts(wts::Vector{T}, vcovblock) where T + sqrtwts = Vector{Vector{T}}(undef, length(vcovblock)) + for i in eachindex(vcovblock) + sqrtwts[i] = @. inv(sqrt($(view(wts, vcovblock[i])))) + end + LMMWts(sqrtwts) + end +end \ No newline at end of file diff --git a/src/miboot.jl b/src/miboot.jl index 806d3e9d..e5075ed7 100644 --- a/src/miboot.jl +++ b/src/miboot.jl @@ -19,20 +19,26 @@ Multiple imputation model. """ struct MILMM{T} <: MetidaModel lmm::LMM{T} - mf::ModelFrame - mm::ModelMatrix + f::FormulaTerm + modstr::ModelStructure covstr::CovStructure data::LMMData{T} dv::LMMDataViews{T} maxvcbl::Int mrs::MRS + wts::Union{Nothing, LMMWts} log::Vector{LMMLogMsg} - function MILMM(lmm::LMM{T}, data) where T + function MILMM(lmm::LMM{T}, data; wts::Union{Nothing, AbstractVector, AbstractString, Symbol} = nothing) where T if !Tables.istable(data) error("Data not a table!") end if !isfitted(lmm) error("LMM should be fitted!") end tv = termvars(lmm.model.rhs) union!(tv, termvars(lmm.covstr.random)) union!(tv, termvars(lmm.covstr.repeated)) + if !isnothing(wts) && wts isa Union{AbstractString, Symbol} + if wts isa String wts = Symbol(wts) end + union!(tv, (wts,)) + end + datam, data_ = StatsModels.missing_omit(NamedTuple{tuple(tv...)}(Tables.columntable(data))) rv = termvars(lmm.model.lhs)[1] rcol = Tables.getcolumn(data, rv)[data_] @@ -42,15 +48,35 @@ struct MILMM{T} <: MetidaModel replace!(rcol, missing => NaN) data = merge(NamedTuple{(rv,)}((convert(Vector{Float64}, rcol),)), datam) lmmlog = Vector{LMMLogMsg}(undef, 0) - mf = ModelFrame(lmm.mf.f, lmm.mf.schema, data, MetidaModel) - mm = ModelMatrix(mf) - mmf = mm.m - lmmdata = LMMData(mmf, data[rv]) + rmf, lmf = modelcols(lmm.f, data) + + #mf = ModelFrame(lmm.f, lmm.mf.schema, data, MetidaModel) + #mm = ModelMatrix(mf) + #mmf = mm.m + + lmmdata = LMMData(lmf, data[rv]) covstr = CovStructure(lmm.covstr.random, lmm.covstr.repeated, data) - dv = LMMDataViews(mmf, lmmdata.yv, covstr.vcovblock) + dv = LMMDataViews(lmf, lmmdata.yv, covstr.vcovblock) mb = missblocks(dv.yv) - dist = mrsdist(lmm, mb, covstr, dv.xv, dv.yv) - new{T}(lmm, mf, mm, covstr, lmmdata, dv, findmax(length, covstr.vcovblock)[1], MRS(mb, dist), lmmlog) + + if isnothing(wts) + lmmwts = nothing + else + if wts isa Symbol + wts = Tables.getcolumn(data, wts) + end + if length(lmmdata.yv) == length(wts) + lmmwts = LMMWts(wts, covstr.vcovblock) + else + @warn "wts count not equal observations count! wts not used." + lmmwts = nothing + end + end + + + dist = mrsdist(lmm, mb, covstr, lmmwts, dv.xv, dv.yv) + + new{T}(lmm, lmm.f, lmm.modstr, covstr, lmmdata, dv, findmax(length, covstr.vcovblock)[1], MRS(mb, dist), lmmwts, lmmlog) end end struct MILMMResult{T} @@ -225,7 +251,7 @@ function bootstrap_(lmm::LMM{T}; n, verbose, init, rng, del) where T mres = ModelResult(false, nothing, fill(NaN, thetalength(lmm)), NaN, fill(NaN, coefn(lmm)), nothing, fill(NaN, coefn(lmm), coefn(lmm)), fill(NaN, coefn(lmm)), nothing, false) - lmmb = LMM(lmm.model, lmm.mf, lmm.mm, lmm.covstr, lmm.data, LMMDataViews(lmm.dv.xv, deepcopy(lmm.dv.yv)), lmm.nfixed, lmm.rankx, mres, lmm.maxvcbl, Vector{LMMLogMsg}(undef, 0)) + lmmb = LMM(lmm.model, lmm.f, lmm.modstr, lmm.covstr, lmm.data, LMMDataViews(lmm.dv.xv, deepcopy(lmm.dv.yv)), lmm.nfixed, lmm.rankx, mres, lmm.maxvcbl, lmm.wts, Vector{LMMLogMsg}(undef, 0)) vi = findall(x-> x == :var, lmm.covstr.ct) tlmm = theta_(lmm) .^ 2 @@ -289,7 +315,7 @@ function dbootstrap_(lmm::LMM{T}; n, verbose, init, rng, del) where T log = Vector{LMMLogMsg}(undef, 0) mres = ModelResult(false, nothing, fill(NaN, thetalength(lmm)), NaN, fill(NaN, coefn(lmm)), nothing, fill(NaN, coefn(lmm), coefn(lmm)), fill(NaN, coefn(lmm)), nothing, false) - lmmb = LMM(lmm.model, lmm.mf, lmm.mm, lmm.covstr, lmm.data, LMMDataViews(lmm.dv.xv, deepcopy(lmm.dv.yv)), lmm.nfixed, lmm.rankx, mres, lmm.maxvcbl, Vector{LMMLogMsg}(undef, 0)) + lmmb = LMM(lmm.model, lmm.f, lmm.modstr, lmm.covstr, lmm.data, LMMDataViews(lmm.dv.xv, deepcopy(lmm.dv.yv)), lmm.nfixed, lmm.rankx, mres, lmm.maxvcbl, lmm.wts, Vector{LMMLogMsg}(undef, 0)) vi = findall(x-> x == :var, lmm.covstr.ct) tlmm = theta_(lmm) .^ 2 @@ -432,7 +458,7 @@ function milmm(mi::MILMM; n = 100, verbose = true, rng = default_rng()) ty = Vector{Float64}(undef, max) for i = 1:n data, dv = generate_mi(rng, mi.data, mi.dv, mi.covstr.vcovblock, mi.mrs, rb, ty) - lmmi = LMM(mi.lmm.model, mi.mf, mi.mm, mi.covstr, data, dv, mi.lmm.nfixed, mi.lmm.rankx, deepcopy(mi.lmm.result), mi.maxvcbl, mi.log) + lmmi = LMM(mi.lmm.model, mi.f, mi.modstr, mi.covstr, data, dv, mi.lmm.nfixed, mi.lmm.rankx, deepcopy(mi.lmm.result), mi.maxvcbl, mi.wts, mi.log) lmm[i] = lmmi end p = Progress(n, dt = 0.5, @@ -507,11 +533,11 @@ function missblocks(yv) vec end # return distribution vector for -function mrsdist(lmm, mb, covstr, xv, yv) +function mrsdist(lmm, mb, covstr, lmmwts, xv, yv) dist = Vector{FullNormal}(undef, length(mb)) #Base.Threads.@threads for i in 1:length(mb) - v = vmatrix(lmm.result.theta, covstr, mb[i][1]) + v = vmatrix(lmm.result.theta, covstr, lmmwts, mb[i][1]) rv = covmatreorder(v, mb[i][2]) dist[i] = mvconddist(rv[1], rv[2], mb[i][2], lmm.result.beta, xv[mb[i][1]], yv[mb[i][1]]) end diff --git a/src/random.jl b/src/random.jl index baf011de..d4c5311c 100644 --- a/src/random.jl +++ b/src/random.jl @@ -37,11 +37,11 @@ function rand!(rng::AbstractRNG, v::AbstractVector, lmm::LMM{T}, theta::Abstract if length(v) != nobs(lmm) error("Wrong v length!") end tv = Vector{T}(undef, lmm.maxvcbl) gvec = gmatvec(theta, lmm.covstr) - rtheta = theta[lmm.covstr.tr[end]] + rtheta = lmm.covstr.tr[lmm.covstr.rn + 1:end] for i = 1:n q = length(lmm.covstr.vcovblock[i]) V = zeros(q, q) - vmatrix!(V, gvec, rtheta, lmm, i) + vmatrix!(V, gvec, theta, rtheta, lmm, i) if length(tv) != q resize!(tv, q) end Distributions.rand!(rng, MvNormal(Symmetric(V)), tv) v[lmm.covstr.vcovblock[i]] .= tv @@ -67,7 +67,7 @@ function rand!(rng::AbstractRNG, v::AbstractVector, lmm::LMM{T}, theta::Abstract tv = Vector{T}(undef, lmm.maxvcbl) m = Vector{T}(undef, lmm.maxvcbl) gvec = gmatvec(theta, lmm.covstr) - rtheta = theta[lmm.covstr.tr[end]] + rtheta = lmm.covstr.tr[lmm.covstr.rn + 1:end] for i = 1:n q = length(lmm.covstr.vcovblock[i]) if length(tv) != q @@ -76,7 +76,7 @@ function rand!(rng::AbstractRNG, v::AbstractVector, lmm::LMM{T}, theta::Abstract end mul!(m, lmm.dv.xv[i], beta) V = zeros(q, q) - vmatrix!(V, gvec, rtheta, lmm, i) + vmatrix!(V, gvec, theta, rtheta, lmm, i) Distributions.rand!(rng, MvNormal(m, Symmetric(V)), tv) v[lmm.covstr.vcovblock[i]] .= tv end diff --git a/src/reml.jl b/src/reml.jl index 445add8d..5b8500c9 100644 --- a/src/reml.jl +++ b/src/reml.jl @@ -44,7 +44,8 @@ function reml_sweep_β(lmm, data, θ::Vector{T}; maxthreads::Int = 16) where T # β = Vector{T}(undef, p) #--------------------------------------------------------------------------- gvec = gmatvec(θ, lmm.covstr) - rθ = θ[lmm.covstr.tr[end]] # R part of θ + rθ = lmm.covstr.tr[lmm.covstr.rn + 1:end] # ranges of R part of θ + #rθ = (θ[t] for t in lmm.covstr.tr) # Repeated vector noerror = true ncore = min(num_cores(), n, maxthreads) accθ₁ = zeros(T, ncore) @@ -72,7 +73,7 @@ function reml_sweep_β(lmm, data, θ::Vector{T}; maxthreads::Int = 16) where T # copyto!(Vx, data.xv[i]) #------------------------------------------------------------------- # Make V matrix - vmatrix!(V, gvec, rθ, lmm, i) + vmatrix!(V, gvec, θ, rθ, lmm, i) # Repeated vector #----------------------------------------------------------------------- if length(swtw[t]) != qswm resize!(swtw[t], qswm) end swm, swr, ne = sweepb!(swtw[t], Vp, 1:q; logdet = true) @@ -123,7 +124,8 @@ function reml_sweep_β(lmm, data, θ::Vector{T}; maxthreads::Int = 16) where T # logdetθ₂ = logdet(rθs₂) =# β .= NaN - return Inf, β, Inf, Inf, false + θ₂ .= NaN + return Inf, β, θs₂, Inf, false end # θ₃ #@inbounds @simd for i = 1:n @@ -144,7 +146,7 @@ function reml_sweep_β_nlopt(lmm, data, θ::Vector{T}; maxthreads::Int = 16) whe A = Vector{Matrix{T}}(undef, n) logdetθ₂ = zero(T) gvec = gmatvec(θ, lmm.covstr) - rθ = θ[lmm.covstr.tr[end]] # R part of θ + rθ = lmm.covstr.tr[lmm.covstr.rn + 1:end] # ranges of R part of θ noerror = true ncore = min(num_cores(), n, maxthreads) accθ₁ = zeros(T, ncore) @@ -160,7 +162,7 @@ function reml_sweep_β_nlopt(lmm, data, θ::Vector{T}; maxthreads::Int = 16) whe i = offset + j q = length(lmm.covstr.vcovblock[i]) V = zeros(T, q, q) - vmatrix!(V, gvec, rθ, lmm, i) + vmatrix!(V, gvec, θ, rθ, lmm, i) #------------------------------------------------------------------- # Cholesky Ai, info = LinearAlgebra.LAPACK.potrf!('U', V) @@ -183,7 +185,8 @@ function reml_sweep_β_nlopt(lmm, data, θ::Vector{T}; maxthreads::Int = 16) whe noerror = all(erroracc) if !noerror β = fill(NaN, lmm.rankx) - return Inf, β, Inf, Inf, false + θ₂ .= NaN + return Inf, β, θ₂, Inf, false end θ₁ = sum(accθ₁) θ₂tc = sum(accθ₂) @@ -193,7 +196,8 @@ function reml_sweep_β_nlopt(lmm, data, θ::Vector{T}; maxthreads::Int = 16) whe ldθ₂, info = LinearAlgebra.LAPACK.potrf!('U', θ₂tc) if info != 0 β = fill(NaN, lmm.rankx) - return Inf, β, Inf, Inf, false + θ₂ .= NaN + return Inf, β, θ₂, Inf, false end LinearAlgebra.LAPACK.potrs!('U', θ₂tc, βtc) β = βtc @@ -218,7 +222,7 @@ function core_sweep_β(lmm, data, θ::Vector{T}, β, n; maxthreads::Int = 16) wh accθ₃ = zeros(T, ncore) erroracc = trues(ncore) gvec = gmatvec(θ, lmm.covstr) - rθ = θ[lmm.covstr.tr[end]] # R part of θ + rθ = lmm.covstr.tr[lmm.covstr.rn + 1:end] # ranges of R part of θ d, r = divrem(n, ncore) Base.Threads.@threads for t = 1:ncore offset = min(t-1, r) + (t-1)*d @@ -232,7 +236,7 @@ function core_sweep_β(lmm, data, θ::Vector{T}, β, n; maxthreads::Int = 16) wh Vx = view(Vp, 1:q, q+1:qswm) Vc = view(Vp, q+1:qswm, q+1:qswm) copyto!(Vx, data.xv[i]) - vmatrix!(V, gvec, rθ, lmm, i) + vmatrix!(V, gvec, θ, rθ, lmm, i) #----------------------------------------------------------------------- swm, swr, ne = sweepb!(Vector{T}(undef, qswm), Vp, 1:q; logdet = true) #----------------------------------------------------------------------- diff --git a/src/reml_grad.jl b/src/reml_grad.jl index f2e008c2..b1ea4e7b 100644 --- a/src/reml_grad.jl +++ b/src/reml_grad.jl @@ -2,6 +2,7 @@ ################################################################################ # Grads +#= """ trmulαβ(A::AbstractMatrix{T}, B::AbstractMatrix) where T @@ -230,3 +231,4 @@ function rmat_g!(mx, θ, rz, g::Int, ::DIAG_) end mx end +=# \ No newline at end of file diff --git a/src/rmat.jl b/src/rmat.jl index 1966305a..15b01543 100644 --- a/src/rmat.jl +++ b/src/rmat.jl @@ -3,13 +3,15 @@ ################################################################################ ################################################################################ -@noinline function rmat_base_inc!(mx, θ, covstr, bi) - en = covstr.rn + 1 +@noinline function rmat_base_inc!(mx, θ, rθ, covstr, bi) block = covstr.vcovblock[bi] - zblock = view(covstr.rz, block, :) - @simd for i = 1:subjn(covstr, en, bi) - sb = getsubj(covstr, en, bi, i) - rmat!(view(mx, sb, sb), θ, view(zblock, sb, :), covstr.repeated.covtype.s) + for j = 1:covstr.rpn + en = covstr.rn + j + zblock = view(covstr.rz[j], block, :) + @simd for i = 1:subjn(covstr, en, bi) + sb = getsubj(covstr, en, bi, i) + rmat!(view(mx, sb, sb), view(θ, rθ[j]), view(zblock, sb, :), covstr.repeated[j].covtype.s) + end end mx end diff --git a/src/statsbase.jl b/src/statsbase.jl index 76936d2d..888be62f 100644 --- a/src/statsbase.jl +++ b/src/statsbase.jl @@ -82,7 +82,7 @@ end Coefficients names. """ -StatsBase.coefnames(lmm::LMM) = StatsBase.coefnames(lmm.mf) +StatsBase.coefnames(lmm::LMM) = StatsBase.coefnames(lmm.f)[2] """ StatsBase.nobs(lmm::MetiaModel) @@ -223,6 +223,7 @@ function StatsBase.coeftable(lmm::LMM) z = co ./ se pvalue = ccdf.(Chisq(1), abs2.(z)) names = coefnames(lmm) + if !isa(names, AbstractVector) names = [names] end return CoefTable( hcat(co, se, z, pvalue), ["Coef.", "Std. Error", "z", "Pr(>|z|)"], @@ -238,8 +239,7 @@ end Responce varible name. """ function StatsBase.responsename(lmm::LMM) - cnm = coefnames(lmm.mf.f.lhs) - return isa(cnm, Vector{String}) ? first(cnm) : cnm + StatsBase.coefnames(lmm.f)[1] end diff --git a/src/statsmodels.jl b/src/statsmodels.jl index d5b32a20..4595f8ef 100644 --- a/src/statsmodels.jl +++ b/src/statsmodels.jl @@ -1,2 +1,2 @@ -StatsModels.formula(lmm::LMM) = lmm.mf.f \ No newline at end of file +StatsModels.formula(lmm::LMM) = lmm.f \ No newline at end of file diff --git a/src/typeiii.jl b/src/typeiii.jl index 895b836a..7128330b 100644 --- a/src/typeiii.jl +++ b/src/typeiii.jl @@ -16,7 +16,7 @@ Type III table. """ function typeiii(lmm::LMM; ddf::Symbol = :satter) if !isfitted(lmm) error("Model not fitted!") end - c = length(lmm.mf.f.rhs.terms) + c = length(lmm.f.rhs.terms) d = Vector{Int}(undef, 0) fac = Vector{String}(undef, c) F = Vector{Float64}(undef,c) @@ -24,7 +24,7 @@ function typeiii(lmm::LMM; ddf::Symbol = :satter) ndf = Vector{Float64}(undef, c) pval = Vector{Float64}(undef, c) for i = 1:c - iterm = lmm.mf.f.rhs.terms[i] + iterm = lmm.f.rhs.terms[i] if typeof(iterm) <: InterceptTerm{false} push!(d, i) diff --git a/src/utils.jl b/src/utils.jl index c76694a6..64ca46e1 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -10,12 +10,16 @@ function initvar(y::Vector, X::Matrix{T}) where T dot(r, r)/(length(r) - size(X, 2)), β end ################################################################################ -function nterms(lmm::LMM) - nterms(lmm.mf) +function fixedeffn(f::FormulaTerm) + length(f.rhs.terms) - !StatsModels.hasintercept(f) end +function fixedeffn(lmm::LMM) + fixedeffn(lmm.f) +end +#= function nterms(mf::ModelFrame) mf.schema.schema.count -end +=# function nterms(rhs::Union{Tuple{Vararg{AbstractTerm}}, Nothing, AbstractTerm}) if isa(rhs, Term) p = 1 @@ -26,7 +30,6 @@ function nterms(rhs::Union{Tuple{Vararg{AbstractTerm}}, Nothing, AbstractTerm}) end p end - """ Rerm name. """ @@ -50,16 +53,16 @@ end L-contrast matrix for `i` fixed effect. """ function lcontrast(lmm::LMM, i::Int) - n = length(lmm.mf.f.rhs.terms) + n = length(lmm.f.rhs.terms) p = size(lmm.data.xv, 2) if i > n || n < 1 error("Factor number out of range 1-$(n)") end - inds = findall(x -> x==i, lmm.mm.assign) - if typeof(lmm.mf.f.rhs.terms[i]) <: CategoricalTerm - mxc = zeros(size(lmm.mf.f.rhs.terms[i].contrasts.matrix, 1), p) + inds = findall(x -> x==i, assign(lmm)) + if typeof(lmm.f.rhs.terms[i]) <: CategoricalTerm + mxc = zeros(size(lmm.f.rhs.terms[i].contrasts.matrix, 1), p) mxcv = view(mxc, :, inds) - mxcv .= lmm.mf.f.rhs.terms[i].contrasts.matrix - mx = zeros(size(lmm.mf.f.rhs.terms[i].contrasts.matrix, 1) - 1, p) - for i = 2:size(lmm.mf.f.rhs.terms[i].contrasts.matrix, 1) + mxcv .= lmm.f.rhs.terms[i].contrasts.matrix + mx = zeros(size(lmm.f.rhs.terms[i].contrasts.matrix, 1) - 1, p) + for i = 2:size(lmm.f.rhs.terms[i].contrasts.matrix, 1) mx[i-1, :] .= mxc[i, :] - mxc[1, :] end else @@ -258,9 +261,25 @@ function rmatrix(lmm::LMM{T}, i::Int) where T if i > length(lmm.covstr.vcovblock) error("Invalid block number: $(i)!") end q = length(lmm.covstr.vcovblock[i]) R = zeros(T, q, q) - rmat_base_inc!(R, lmm.result.theta[lmm.covstr.tr[end]], lmm.covstr, i) + rθ = lmm.covstr.tr[lmm.covstr.rn + 1:end] + rmat_base_inc!(R, lmm.result.theta, rθ, lmm.covstr, i) Symmetric(R) end + +##################################################################### + +function applywts!(::Any, ::Int, ::Nothing) + nothing +end + +function applywts!(V::AbstractMatrix, i::Int, wts::LMMWts) + mulβdαβd!(V, wts.sqrtwts[i]) +end + + +##################################################################### + +##################################################################### """ vmatrix!(V, θ, lmm, i) @@ -268,14 +287,18 @@ Update variance-covariance matrix V for i bolock. Upper triangular updated. """ function vmatrix!(V, θ, lmm::LMM, i::Int) # pub API gvec = gmatvec(θ, lmm.covstr) + rθ = lmm.covstr.tr[lmm.covstr.rn + 1:end] + rmat_base_inc!(V, θ, rθ, lmm.covstr, i) # Repeated vector + applywts!(V, i, lmm.wts) zgz_base_inc!(V, gvec, lmm.covstr, i) - rmat_base_inc!(V, θ[lmm.covstr.tr[end]], lmm.covstr, i) + end # !!! Main function REML used -function vmatrix!(V, G, rθ, lmm::LMM, i::Int) +@noinline function vmatrix!(V, G, θ, rθ, lmm::LMM, i::Int) + rmat_base_inc!(V, θ, rθ, lmm.covstr, i) # Repeated vector + applywts!(V, i, lmm.wts) zgz_base_inc!(V, G, lmm.covstr, i) - rmat_base_inc!(V, rθ, lmm.covstr, i) end """ @@ -290,15 +313,18 @@ end function vmatrix(θ::AbstractVector{T}, lmm::LMM, i::Int) where T V = zeros(T, length(lmm.covstr.vcovblock[i]), length(lmm.covstr.vcovblock[i])) gvec = gmatvec(θ, lmm.covstr) - vmatrix!(V, gvec, θ[lmm.covstr.tr[end]], lmm, i) + rθ = lmm.covstr.tr[lmm.covstr.rn + 1:end] + vmatrix!(V, gvec, θ, rθ, lmm, i) # Repeated vector Symmetric(V) end # For Multiple Imputation -function vmatrix(θ::Vector, covstr::CovStructure, i::Int) +function vmatrix(θ::Vector, covstr::CovStructure, lmmwts, i::Int) V = zeros(length(covstr.vcovblock[i]), length(covstr.vcovblock[i])) gvec = gmatvec(θ, covstr) + rθ = covstr.tr[covstr.rn + 1:end] + rmat_base_inc!(V, θ, rθ, covstr, i) # Repeated vector + applywts!(V, i, lmmwts) zgz_base_inc!(V, gvec, covstr, i) - rmat_base_inc!(V, θ[covstr.tr[end]], covstr, i) Symmetric(V) end @@ -428,3 +454,10 @@ function StatsModels.termvars(ve::Vector{VarEffect}) end ################################################################################ +#= +asgn(f::FormulaTerm) = asgn(f.rhs) +asgn(t) = mapreduce(((i,t), ) -> i*ones(StatsModels.width(t)), + append!, + enumerate(StatsModels.vectorize(t)), + init=Int[]) +=# \ No newline at end of file diff --git a/src/varstruct.jl b/src/varstruct.jl index 297c1bb9..d87dd76d 100644 --- a/src/varstruct.jl +++ b/src/varstruct.jl @@ -85,8 +85,14 @@ end # COVARIANCE STRUCTURE ################################################################################ function sabjcrossdicts(d1, d2) - if length(d1) == 1 return d1 end - if length(d2) == 1 return d2 end + + if length(d1) == 1 + return d1 + elseif length(d2) == 1 + return d2 + elseif length(d1) == 0 + return d2 + end d2 = copy(d2) d1 = copy(d1) i = 0 @@ -152,13 +158,19 @@ struct CovStructure{T} <: AbstractCovarianceStructure # Random effects random::Vector{VarEffect} # Repearted effects - repeated::VarEffect + repeated::Vector{VarEffect} + # schema schema::Vector{Union{Tuple, AbstractTerm}} + # names rcnames::Vector{String} # blocks for vcov matrix / variance blocking factor (subject) vcovblock::Vector{Vector{Int}} - # number of random effect + # number of random effect rn::Int + # number coef. of random effect in θ vector + rtn::Int + # number of repeated effect + rpn::Int # Z matrix z::Matrix{T} #subjz::Vector{BitArray{2}} @@ -168,7 +180,7 @@ struct CovStructure{T} <: AbstractCovarianceStructure # unit range z column range for each random effect zrndur::Vector{UnitRange{Int}} # repeated effect parametrization matrix - rz::Matrix{T} + rz::Vector{Matrix{T}} # size 2 of z/rz matrix q::Vector{Int} # total number of parameters in each effect @@ -187,7 +199,7 @@ struct CovStructure{T} <: AbstractCovarianceStructure maxn::Int #-- function CovStructure(random, repeated, data) - alleffl = length(random) + 1 + alleffl = length(random) + length(repeated) rown = length(Tables.rows(data)) # q = Vector{Int}(undef, alleffl) @@ -198,11 +210,15 @@ struct CovStructure{T} <: AbstractCovarianceStructure #subjz = Vector{BitMatrix}(undef, alleffl) dicts = Vector{Dict}(undef, alleffl) # - zrndur = Vector{UnitRange{Int}}(undef, alleffl - 1) - # Z Matrix for repeated effect - rz = Matrix{Float64}(undef, rown, 0) + zrndur = Vector{UnitRange{Int}}(undef, length(random)) # Number of random effects rn = length(random) + # + rtn = 0 + # Number of repeated effects + rpn = length(repeated) + # Z Matrix for repeated effect + rz = Vector{Matrix{Float64}}(undef, rpn) # #Theta parameter type ct = Vector{Symbol}(undef, 0) @@ -243,34 +259,39 @@ struct CovStructure{T} <: AbstractCovarianceStructure sn[i] = length(dicts[i]) updatenametype!(ct, rcnames, csp, schema[i], random[i].covtype.s) append!(emap, fill(i, t[i])) + rtn += t[i] end # REPEATED EFFECTS - if length(repeated.coding) == 0 - fill_coding_dict!(repeated.model, repeated.coding, data) - end + for i = 1:length(repeated) - schema[end] = apply_schema(repeated.model, StatsModels.schema(data, repeated.coding)) - rz = modelcols(MatrixTerm(schema[end]), data) - symbs = StatsModels.termvars(repeated.subj) - if length(symbs) > 0 - cdata = tabcols(data, symbs) # Tuple(Tables.getcolumn(Tables.columns(data), x) for x in symbs) - dicts[end] = Dict{Tuple{eltype.(cdata)...}, Vector{Int}}() - indsdict!(dicts[end], cdata) - else - dicts[end] = Dict(1 => collect(1:rown)) #changed to range - end + if length(repeated[i].coding) == 0 + fill_coding_dict!(repeated[i].model, repeated[i].coding, data) + end + + schema[rn+i] = apply_schema(repeated[i].model, StatsModels.schema(data, repeated[i].coding)) + rz[i] = modelcols(MatrixTerm(schema[rn+i]), data) + symbs = StatsModels.termvars(repeated[i].subj) + if length(symbs) > 0 + cdata = tabcols(data, symbs) # Tuple(Tables.getcolumn(Tables.columns(data), x) for x in symbs) + dicts[rn+i] = Dict{Tuple{eltype.(cdata)...}, Vector{Int}}() + indsdict!(dicts[rn+i], cdata) + else + dicts[rn+i] = Dict(1 => collect(1:rown)) #changed to range + end - sn[end] = length(dicts[end]) - q[end] = size(rz, 2) - csp = covstrparam(repeated.covtype.s, q[end]) - t[end] = sum(csp) - tr[end] = UnitRange(sum(t[1:end-1]) + 1, sum(t[1:end-1]) + t[end]) - updatenametype!(ct, rcnames, csp, schema[end], repeated.covtype.s) + sn[rn+i] = length(dicts[rn+i]) + q[rn+i] = size(rz[i], 2) + csp = covstrparam(repeated[i].covtype.s, q[rn+i]) + t[rn+i] = sum(csp) + tr[rn+i] = UnitRange(sum(t[1:rn+i-1]) + 1, sum(t[1:rn+i-1]) + t[rn+i]) + updatenametype!(ct, rcnames, csp, schema[rn+i], repeated[i].covtype.s) + + # emap + append!(emap, fill(rn+i, t[rn+i])) + end # Theta length tl = sum(t) - # emap - append!(emap, fill(rn+1, t[end])) ######################################################################## #if any(x-> 1 in keys(x), dicts[1:end-1]) # blocks = [first(dicts)[1]] @@ -282,16 +303,27 @@ struct CovStructure{T} <: AbstractCovarianceStructure subjblockdict = sabjcrossdicts(subjblockdict, dicts[i]) end end - if !(isa(repeated.covtype.s, SI_) || isa(repeated.covtype.s, DIAG_)) # if repeated effect have non-diagonal structure - subjblockdict = sabjcrossdicts(subjblockdict, dicts[end]) + else + subjblockdict = nothing + end + repn = Int[] + for i = 1:length(repeated) + if isnothing(subjblockdict) + subjblockdict = dicts[rn+i] + elseif !(isa(repeated[i].covtype.s, SI_) || isa(repeated[i].covtype.s, DIAG_)) # if repeated effect have non-diagonal structure + subjblockdict = sabjcrossdicts(subjblockdict, dicts[rn+i]) # make dict for non SI DIAG repeated effects else - dicts[end] = subjblockdict + push!(repn, i) # just collect ind of SI DIAG repeated effects end - else - subjblockdict = dicts[end] end + for i in repn # make SI DIAG repeated effects dict - subjblockdict + dicts[rn+i] = subjblockdict + end + + blocks = collect(values(subjblockdict)) #end + sblock = Matrix{Vector{Tuple{Vector{Int}, Int}}}(undef, length(blocks), alleffl) nblock = [] ####################################################################### @@ -319,7 +351,7 @@ struct CovStructure{T} <: AbstractCovarianceStructure end esb = EffectSubjectBlock(sblock, nblock) ####################################################################### - new{eltype(z)}(random, repeated, schema, rcnames, blocks, rn, z, esb, zrndur, rz, q, t, tr, tl, ct, emap, sn, maxn) + new{eltype(z)}(random, repeated, schema, rcnames, blocks, rn, rtn, rpn, z, esb, zrndur, rz, q, t, tr, tl, ct, emap, sn, maxn) end end ############################################################################### @@ -434,8 +466,12 @@ function Base.show(io::IO, cs::CovStructure) for i = 1:length(cs.random) println(io, "Random $(i):", cs.random[i]) end - println(io, "Repeated: ", cs.repeated) + for i = 1:length(cs.repeated) + println(io, "Repeated $(i): ", cs.repeated[i]) + end println(io, "Random effect range in complex Z: ", cs.zrndur) + println(io, "Random coef. in θ: ", cs.rtn) + println(io, "Range of each parameters in θ vector: ", cs.tr) println(io, "Size of Z: ", cs.q) println(io, "Parameter number for each effect: ", cs.t) println(io, "Theta length:", cs.tl) diff --git a/src/vartypes.jl b/src/vartypes.jl index ba30346a..1b4707b4 100644 --- a/src/vartypes.jl +++ b/src/vartypes.jl @@ -37,11 +37,11 @@ struct ZERO <: AbstractCovarianceType end Make covariance type with AbstractCovmatMethod. """ -struct CovarianceType - s::AbstractCovarianceType +struct CovarianceType{T <: AbstractCovarianceType} + s::T z::Bool - function CovarianceType(s::AbstractCovarianceType, z::Bool) - new(s, z) + function CovarianceType(s::T, z::Bool) where T <: AbstractCovarianceType + new{T}(s, z) end function CovarianceType(s::AbstractCovarianceType) CovarianceType(s, true) diff --git a/test/csv/df0.csv b/test/csv/df0.csv index 23e46183..08133e91 100644 --- a/test/csv/df0.csv +++ b/test/csv/df0.csv @@ -1,21 +1,21 @@ -subject,sequence,period,formulation,var,var2 -1,1,1,1,1.0,1.0 -1,1,2,2,1.1,2.0 -1,1,3,1,1.2,3.0 -1,1,4,2,1.3,4.0 -2,1,1,1,2.0,5.0 -2,1,2,2,2.1,6.0 -2,1,3,1,2.4,7.0 -2,1,4,2,2.2,2.0 -3,2,1,2,1.3,3.5 -3,2,2,1,1.5,3.0 -3,2,3,2,1.6,4.0 -3,2,4,1,1.4,5.0 -4,2,1,2,1.5,6.0 -4,2,2,1,1.7,1.2 -4,2,3,2,1.3,3.4 -4,2,4,1,1.4,5.0 -5,2,1,2,1.5,6.0 -5,2,2,1,1.7,7.0 -5,2,3,2,1.2,10.0 -5,2,4,1,1.8,9.0 +"subject","sequence","period","formulation","var","var2","wts" +1,1,1,1,1,1,1 +1,1,2,2,1.1,2,1 +1,1,3,1,1.2,3,1 +1,1,4,2,1.3,4,0.5 +2,1,1,1,2,5,1 +2,1,2,2,2.1,6,1 +2,1,3,1,2.4,7,0.25 +2,1,4,2,2.2,2,0.25 +3,2,1,2,1.3,3.5,1 +3,2,2,1,1.5,3,0.3 +3,2,3,2,1.6,4,0.3 +3,2,4,1,1.4,5,0.3 +4,2,1,2,1.5,6,0.1 +4,2,2,1,1.7,1.2,0.2 +4,2,3,2,1.3,3.4,0.3 +4,2,4,1,1.4,5,0.4 +5,2,1,2,1.5,6,1 +5,2,2,1,1.7,7,2 +5,2,3,2,1.2,10,3 +5,2,4,1,1.8,9,4 diff --git a/test/test.jl b/test/test.jl index 68c31308..54bc17cb 100644 --- a/test/test.jl +++ b/test/test.jl @@ -6,6 +6,7 @@ path = dirname(@__FILE__) include("testdata.jl") @testset " Publick API basic tests " begin + io = IOBuffer(); transform!(df0, :formulation => categorical, renamecols=false) # Basic, no block df0.nosubj = ones(size(df0, 1)) @@ -13,7 +14,9 @@ include("testdata.jl") random = Metida.VarEffect(Metida.@covstr(formulation|nosubj), Metida.DIAG), ) Metida.fit!(lmm) - @test Metida.m2logreml(lmm) ≈ 25.129480634331067 atol=1E-6 + @test Metida.m2logreml(lmm, ) ≈ 25.129480634331067 atol=1E-6 + # Test -2 reml for provided theta + @test Metida.m2logreml(lmm, Metida.theta(lmm)) ≈ 25.129480634331067 atol=1E-6 # Casuistic case - random lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; @@ -49,14 +52,21 @@ include("testdata.jl") ) @test Metida.m2logreml(lmm) ≈ 16.241112644506067 atol=1E-6 + lmm = Metida.fit(Metida.LMM, Metida.@lmmformula(var~0+sequence+period+formulation, + random = formulation|subject:Metida.DIAG), df0) + @test Metida.fixedeffn(lmm) == 3 + t3table = Metida.typeiii(lmm) + @test length(t3table.name) == 3 + lmm = Metida.fit(Metida.LMM, Metida.@lmmformula(var~sequence+period+formulation, random = formulation|subject:Metida.DIAG), df0) @test Metida.m2logreml(lmm) ≈ 16.241112644506067 atol=1E-6 + @test Metida.fixedeffn(lmm) == 4 t3table = Metida.typeiii(lmm; ddf = :contain) # NOT VALIDATED t3table = Metida.typeiii(lmm; ddf = :residual) t3table = Metida.typeiii(lmm) - + @test length(t3table.name) == 4 ############################################################################ ############################################################################ # API test @@ -93,7 +103,7 @@ include("testdata.jl") @test isa(response(lmm), Vector) @test sum(Metida.hessian(lmm)) ≈ 1118.160713481362 atol=1E-2 @test Metida.nblocks(lmm) == 5 - @test length(coefnames(lmm)) == 6 + @test coefnames(lmm) == ["(Intercept)", "sequence: 2", "period: 2", "period: 3", "period: 4", "formulation: 2"] @test Metida.gmatrixipd(lmm) @test Metida.confint(lmm)[end][1] ≈ -0.7630380758015894 atol=1E-4 @test Metida.confint(lmm, 6)[1] ≈ -0.7630380758015894 atol=1E-4 @@ -109,8 +119,13 @@ include("testdata.jl") est = Metida.estimate(lmm; level = 0.9) @test_nowarn formula(lmm) - - + + # + onefelmm = Metida.LMM(@formula(var~1), df0; + random = Metida.VarEffect(Metida.@covstr(formulation|subject), Metida.DIAG), + ) + @test coefnames(onefelmm) == "(Intercept)" + @test_nowarn show(io, onefelmm) ############################################################################ # AI like algo Metida.fit!(lmm; aifirst = true, init = Metida.theta(lmm)) @@ -178,6 +193,29 @@ include("testdata.jl") random = 1+var^2|subject:Metida.SI), df0) Metida.fit!(lmmint) @test Metida.m2logreml(lmmint) ≈ 84.23373276096902 atol=1E-6 + + # Wts + + df0.wtsc = fill(0.5, size(df0, 1)) + lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; + random = Metida.VarEffect(Metida.@covstr(formulation|subject), Metida.DIAG), + wts = df0.wtsc) + fit!(lmm) + @test Metida.m2logreml(lmm) ≈ 16.241112644506067 atol=1E-6 + + lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; + random = Metida.VarEffect(Metida.@covstr(formulation|subject), Metida.DIAG), + wts = "wts") + fit!(lmm) + @test Metida.m2logreml(lmm) ≈ 17.823729 atol=1E-6 # TEST WITH SPSS 28 + + # Repeated vector + + lmm = Metida.LMM(@formula(var~sequence+period+formulation), df0; + repeated = [Metida.VarEffect(Metida.@covstr(formulation|subject), Metida.DIAG), Metida.VarEffect(Metida.@covstr(1|subject), Metida.SI)]) + fit!(lmm) + @test_nowarn show(io, lmm) + end ################################################################################ # df0 @@ -462,7 +500,7 @@ end random = Metida.VarEffect(Metida.@covstr(1 + r2 * r1|subject), Metida.DIAG; coding=Dict(:r1 => DummyCoding(), :r2 => DummyCoding())) ) Metida.fit!(lmm) - @test Metida.theta(lmm) ≈ [2.796694409004289, 2.900485570555582, 3.354913215348968, 2.0436114769223237, 1.8477830405766895, 2.0436115732330955, 1.0131934233937254] atol=1E-6 # atol=1E-8 ! + @test Metida.theta(lmm) ≈ [2.796694409004289, 2.900485570555582, 3.354913215348968, 2.0436114769223237, 1.8477830405766895, 2.0436115732330955, 1.0131934233937254] atol=1E-5 # atol=1E-8 ! @test Metida.m2logreml(lmm) ≈ 713.0655862252027 atol=1E-8 end @testset " Model: &, DIAG/SI " begin @@ -470,7 +508,7 @@ end random = Metida.VarEffect(Metida.@covstr(r1&r2|subject), Metida.DIAG), ) Metida.fit!(lmm) - @test Metida.theta(lmm) ≈ [3.0325005960015985, 3.343826588448401, 1.8477830405766895, 1.8477830405766895, 1.8477830405766895, 4.462942536844632, 1.0082345219318216] atol=1E-6 # atol=1E-8 ! + @test Metida.theta(lmm) ≈ [3.0325005960015985, 3.343826588448401, 1.8477830405766895, 1.8477830405766895, 1.8477830405766895, 4.462942536844632, 1.0082345219318216] atol=1E-5 # atol=1E-8 ! @test Metida.m2logreml(lmm) ≈ 719.9413776641368 atol=1E-8 end @testset " Model: INT, +, TOEPHP(3)/SI " begin @@ -636,6 +674,8 @@ end repeated = Metida.VarEffect(Metida.@covstr(period|subject), CustomCovarianceStructure()), ) Metida.fit!(lmm) + io = IOBuffer(); + @test_nowarn show(io, lmm) @test Metida.m2logreml(lmm) ≈ 8.740095378772942 atol=1E-8 end @@ -701,7 +741,7 @@ end @test_nowarn Base.show(io, lmm.data) @test_nowarn Base.show(io, lmm.result) @test_nowarn Base.show(io, lmm.covstr) - @test_nowarn Base.show(io, lmm.covstr.repeated.covtype) + @test_nowarn Base.show(io, lmm.covstr.repeated[1].covtype) @test_nowarn Base.show(io, Metida.getlog(lmm)) t3table = Metida.typeiii(lmm) diff --git a/test/testdata.jl b/test/testdata.jl index 47675174..9532ab19 100644 --- a/test/testdata.jl +++ b/test/testdata.jl @@ -1,7 +1,7 @@ # Metida #Simple dataset -df0 = CSV.File(path*"/csv/df0.csv"; types = [String, String, String, String, Float64, Float64]) |> DataFrame +df0 = CSV.File(path*"/csv/df0.csv"; types = [String, String, String, String, Float64, Float64, Float64]) |> DataFrame df0m = CSV.File(path*"/csv/df0miss.csv"; types = [String, String, String, String, Float64, Float64]) |> DataFrame