Skip to content

Commit

Permalink
fix usage of RateTables.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
lrnv committed Apr 4, 2024
1 parent 7d17891 commit 31721cb
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
3 changes: 2 additions & 1 deletion src/PoharPerme.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ function _Λ(T, Δ, age, year, sex, ratetable, grid)
Tᵢ = searchsortedlast(grid, T[i]) # index of the time of event (death or censored) in the grid
wₚ = 1.0
sΛₚ = 0.0
rtᵢ = ratetable[sex[i]] # other predictors for this individual have to go here.
for j in 1:Tᵢ
λₚ = λ(ratetable, age[i] + grid[j], year[i] + grid[j], sex[i])
λₚ = daily_hazard(rtᵢ, age[i] + grid[j], year[i] + grid[j])
Λₚ = λₚ * (grid[j+1]-grid[j]) # λₚ * ∂t
sΛₚ += Λₚ
wₚ = exp(sΛₚ)
Expand Down
11 changes: 6 additions & 5 deletions src/nonparamfit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ T = modelcols(t.T, d)
return hcat(T,Δ) ############ <<<<<----
end

function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateTableV2) where {E<:NonparametricEstimator}
function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateTables.AbstractRateTable) where {E<:NonparametricEstimator}
column_names = names(df)
expected_columns = String.(keys(rt.axes))
rate_predictors =RateTables.predictors(rt)

expected_columns = String.(rate_predictors)
missing_columns = filter(name -> !(name in column_names), expected_columns)
if !isempty(missing_columns)
throw(ArgumentError("Missing columns in data: $missing_columns"))
Expand All @@ -40,9 +41,9 @@ function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateT
formula = apply_schema(formula,schema(df))
pred_names = StatsModels.termvars(formula)
new_df = groupby(df, pred_names)
pred_names = String.(pred_names)
pred_names = String.(pred_names) # <<<---- is this really needed ? Typing the same variable two different time in a function makes things really slow (cause type instability).

g = term(1) ~ foldl(+,term.(keys(rt.axes)))
g = term(1) ~ term(:age) + term(:year) + foldl(+,term.(rate_predictors)) # age and year are not in the predictors anymore, but this in on purpose.
g = apply_schema(g,schema(df))
pred_g = modelcols(g.rhs, df)

Expand All @@ -53,7 +54,7 @@ function StatsBase.fit(::Type{E}, formula::FormulaTerm, df::DataFrame, rt::RateT
if nrow(unique(df[!,pred_names])) == 0
resp = modelcols(formula.lhs, df)
pred_g = modelcols(g.rhs, df)
push!(pp,PoharPerme(resp[:,1], resp[:,2], pred_g[:,1], pred_g[:,2], temp, rt))
push!(pp,PoharPerme(resp[:,1], resp[:,2], pred_g[:,1], pred_g[:,2], temp, rt)) # <<<- Here, what if there are more predictors for the rate table ? this is not generic enough.
else
for i in 1:nrow(unique(df[!,pred_names]))
resp = modelcols(formula.lhs, new_df[i])
Expand Down

0 comments on commit 31721cb

Please sign in to comment.