From 31721cb41ba24701e33d288581328bbe87b76506 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Thu, 4 Apr 2024 14:00:19 +0200 Subject: [PATCH] fix usage of RateTables.jl --- src/PoharPerme.jl | 3 ++- src/nonparamfit.jl | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/PoharPerme.jl b/src/PoharPerme.jl index 892bb73..5bc8721 100644 --- a/src/PoharPerme.jl +++ b/src/PoharPerme.jl @@ -17,8 +17,9 @@ function _Λ(T, Δ, age, year, sex, ratetable, grid) Tᵢ = searchsortedlast(grid, T[i]) # index of the time of event (death or censored) in the grid wₚ = 1.0 sΛₚ = 0.0 + rtᵢ = ratetable[sex[i]] # other predictors for this individual have to go here. for j in 1:Tᵢ - λₚ = λ(ratetable, age[i] + grid[j], year[i] + grid[j], sex[i]) + λₚ = daily_hazard(rtᵢ, age[i] + grid[j], year[i] + grid[j]) Λₚ = λₚ * (grid[j+1]-grid[j]) # λₚ * ∂t sΛₚ += Λₚ wₚ = exp(sΛₚ) diff --git a/src/nonparamfit.jl b/src/nonparamfit.jl index 662c9b5..ca133f5 100644 --- a/src/nonparamfit.jl +++ b/src/nonparamfit.jl @@ -28,10 +28,11 @@ T = modelcols(t.T, d) return hcat(T,Δ) ############ <<<<<---- end -function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateTableV2) where {E<:NonparametricEstimator} +function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateTables.AbstractRateTable) where {E<:NonparametricEstimator} column_names = names(df) - expected_columns = String.(keys(rt.axes)) + rate_predictors =RateTables.predictors(rt) + expected_columns = String.(rate_predictors) missing_columns = filter(name -> !(name in column_names), expected_columns) if !isempty(missing_columns) throw(ArgumentError("Missing columns in data: $missing_columns")) @@ -40,9 +41,9 @@ function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateT formula = apply_schema(formula,schema(df)) pred_names = StatsModels.termvars(formula) new_df = groupby(df, pred_names) - pred_names = String.(pred_names) + pred_names = String.(pred_names) # <<<---- is this really needed ? Typing the same variable two different time in a function makes things really slow (cause type instability). - g = term(1) ~ foldl(+,term.(keys(rt.axes))) + g = term(1) ~ term(:age) + term(:year) + foldl(+,term.(rate_predictors)) # age and year are not in the predictors anymore, but this in on purpose. g = apply_schema(g,schema(df)) pred_g = modelcols(g.rhs, df) @@ -53,7 +54,7 @@ function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateT if nrow(unique(df[!,pred_names])) == 0 resp = modelcols(formula.lhs, df) pred_g = modelcols(g.rhs, df) - push!(pp,PoharPerme(resp[:,1], resp[:,2], pred_g[:,1], pred_g[:,2], temp, rt)) + push!(pp,PoharPerme(resp[:,1], resp[:,2], pred_g[:,1], pred_g[:,2], temp, rt)) # <<<- Here, what if there are more predictors for the rate table ? this is not generic enough. else for i in 1:nrow(unique(df[!,pred_names])) resp = modelcols(formula.lhs, new_df[i])