Skip to content

Commit

Permalink
Merge pull request #41 from gpitt71/main
Browse files Browse the repository at this point in the history
Version 1.0.0
  • Loading branch information
gpitt71 authored Nov 6, 2024
2 parents a957f7b + 045d63b commit 037ef93
Show file tree
Hide file tree
Showing 6 changed files with 3 additions and 237 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ReSurv
Type: Package
Title: Machine learning models for predicting IBNR claim counts
Version: 0.0.2
Version: 1.0.0
Authors@R:
c(person(given = "Emil",
family = "Hofman",
Expand Down Expand Up @@ -36,7 +36,6 @@ Imports:
tibble,
ggplot2,
survival,
LTRCtrees,
reshape2,
bshazard,
SynthETIC,
Expand Down
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ export(ReSurvCV)
export(data_generator)
export(ooslkh)
export(survival_crps)
import(LTRCtrees)
import(SHAPforxgboost)
import(SynthETIC)
import(data.table)
Expand All @@ -31,7 +30,6 @@ import(rpart)
import(survival)
import(tidyverse)
import(xgboost)
importFrom(LTRCtrees,LTRCART)
importFrom(bshazard,bshazard)
importFrom(data.table,data.table)
importFrom(dplyr,"%>%")
Expand Down
59 changes: 0 additions & 59 deletions R/ReSurvIndividualData.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@
#' @import tidyverse
#' @import xgboost
#' @import rpart
#' @import LTRCtrees
#' @import data.table
#' @importFrom dplyr reframe full_join
#' @importFrom tidyr replace_na
Expand Down Expand Up @@ -669,64 +668,6 @@ ReSurv.IndividualDataPP <- function(IndividualDataPP,

}

if(hazard_model == "LTRCtrees"){

X <- pkg.env$model.matrix.creator(data= IndividualDataPP$training.data,
select_columns = IndividualDataPP$categorical_features,
remove_first_dummy=T)

scaler <- pkg.env$scaler(continuous_features_scaling_method = continuous_features_scaling_method)

Xc <- IndividualDataPP$training.data %>%
reframe(across(all_of(IndividualDataPP$continuous_features),
scaler))

training_test_split = pkg.env$check.traintestsplit(percentage_data_training)

X=cbind(X,Xc)

Y=IndividualDataPP$training.data[,c("DP_rev_i", "I", "TR_i")]

control.pars <- do.call(rpart.control, hparameters)

model.out <- pkg.env$fit_LTRCtrees(data=IndividualDataPP$training.data,
formula_ct=formula_ct,
newdata=newdata,
control.pars)


bsln <- pkg.env$baseline.calc(hazard_model = hazard_model,
model.out = model.out$cox,
X=X,
Y=Y,
training_df=IndividualDataPP$training.data)


pred <- predict(model.out$cox,newdata)

benchmark_id <- 1
# pred_relative <- model.out$expg/model.out$expg[benchmark_id]

pred_relative <- exp(pred - pred[benchmark_id])

# exp(pred_relative)

hazard_frame <- cbind(newdata, expg=pred_relative)
# colnames(hazard_frame)[dim(hazard_frame)[2]]="expg"

bsln <- data.frame(baseline=bsln,
DP_rev_i=sort(as.integer(unique(IndividualDataPP$training.data$DP_rev_i))))



is_lkh <- pkg.env$evaluate_lkh_LTRCtrees(X_train=IndividualDataPP$training.data %>% select(c(IndividualDataPP$categorical_features,IndividualDataPP$continuous_features)),
Y_train=Y,
model=model.out)

os_lkh <- NULL

}

##################################################################################


Expand Down
27 changes: 0 additions & 27 deletions R/ResurvcvIndividualData.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,33 +207,6 @@ ReSurvCV.IndividualDataPP <- function(IndividualDataPP,

hparameters.f <- pkg.env$nn_hparameter_nodes_grid(hparameters.f, cv=T)

if(model == "LTRCtrees"){

formula_ct <- as.formula(IndividualDataPP$string_formula_i)

out.cv <- pkg.env$ltrcart_cv(IndividualDataPP=IndividualDataPP,
folds=folds,
formula_ct=formula_ct,
hparameters.f=hparameters.f,
verbose.cv=verbose.cv)

# Take the best result oos
out.best.oos <- out.cv %>%
filter(is_lkh==min(is_lkh)) %>%
as.data.frame()

# List the output of the cv and the best result OOS
out <- list(
out.cv = out.cv,
out.cv.best.oos = out.best.oos
)

class(out) <- c('ReSurvCV')

return(out)

}

train.lkh <- vector("numeric",
length=dim(hparameters.f)[1])

Expand Down
147 changes: 1 addition & 146 deletions R/helperfunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#' @import reticulate
#' @import xgboost
#' @importFrom rpart rpart.control
#' @importFrom LTRCtrees LTRCART
#' @import data.table
#' @importFrom dplyr reframe lag full_join rename
#' @importFrom tidyr replace_na
Expand Down Expand Up @@ -1129,19 +1128,6 @@ pkg.env$fit_cox_model <- function(data,
}


pkg.env$fit_LTRCtrees <- function(data,
formula_ct,
newdata,
control.pars){

LTRCART.fit <- LTRCART(formula_ct, data=data, control = control.pars)

# The following is relative risk predicitons from LTRCtrees
LTRCART.pred <- predict(LTRCART.fit, newdata = newdata)

list(cox=LTRCART.fit,
expg = unname(LTRCART.pred))
}


pkg.env$fit_deep_surv <- function(data,
Expand Down Expand Up @@ -2359,13 +2345,6 @@ pkg.env$baseline.calc <- function(hazard_model,
predict_bsln <- predict(model.out,datads_pp$ds_train_m)
}

if(hazard_model == "LTRCtrees"){

predict_bsln <- log(predict(model.out, training_df %>%
arrange(DP_rev_i) %>%
as.data.frame()))

}

predict_bsln <- predict_bsln - predict_bsln[1] #make relative to initial value, same approach as cox
bsln <- pkg.env$baseline.efron(predict_bsln,
Expand Down Expand Up @@ -2703,53 +2682,6 @@ pkg.env$xgboost_cv <- function(IndividualDataPP,
}


pkg.env$ltrcart_cv <- function(IndividualDataPP,
folds,
formula_ct,
hparameters.f,
verbose.cv){

hparameters.f['xval']=folds
out <- data.frame()

for(hp in 1:dim(hparameters.f)[1]){

if(verbose.cv){cat(as.character(Sys.time()),
"Testing hyperparameters combination",
hp,
"out of",
dim(hparameters.f)[1], "\n")}

control.pars <- do.call(rpart.control, as.list.data.frame(hparameters.f[hp,]))

LTRCART.fit <- LTRCART(formula_ct, data=IndividualDataPP$training.data, control=control.pars)



tmp <- as.data.frame.matrix(LTRCART.fit$cptable)


Y=IndividualDataPP$training.data[,c("DP_rev_i", "I", "TR_i")]

model.out <- list()
model.out$cox <-LTRCART.fit

is_lkh <- pkg.env$evaluate_lkh_LTRCtrees(X_train=IndividualDataPP$training.data %>% select(c(IndividualDataPP$categorical_features,IndividualDataPP$continuous_features)),
Y_train=Y,
model=model.out)


tmp$is_lkh<- is_lkh$value
out <- rbind(out,tmp[which.min(tmp[,"xerror"]),])

}

out <- cbind(hparameters.f, out)

return(out)

}

# nn cv -----
pkg.env$nn_hparameter_nodes_grid <- function(hparameters, cv = FALSE){
"
Expand Down Expand Up @@ -2908,7 +2840,6 @@ pkg.env$evaluate_lkh_nn <-function(X_train,
mutate(efron_c=(1:length(DP_rev_i)-1)/length(DP_rev_i))%>% as.data.frame()


# if(hazard_model %in% c("COX","LTRCtrees")){ds_train_m <- X_train}
# if(hazard_model == "XGB"){


Expand Down Expand Up @@ -2939,11 +2870,6 @@ pkg.env$evaluate_lkh_nn <-function(X_train,



# if(hazard_model == "LTRCtrees"){
# preds_tr <- predict(model$cox,ds_train_m)
# preds_tr <- preds_tr - preds_tr[1]
# }


train_lkh=cox_evaluation_metrix(dtrain=ds_train_m,
preds=as.vector(preds))
Expand Down Expand Up @@ -2979,7 +2905,7 @@ pkg.env$evaluate_lkh_xgb <-function(X_train,
mutate(efron_c=(1:length(DP_rev_i)-1)/length(DP_rev_i))%>% as.data.frame()


# if(hazard_model %in% c("COX","LTRCtrees")){ds_train_m <- X_train}

# if(hazard_model == "XGB"){
ds_train_m <- xgboost::xgb.DMatrix( as.matrix.data.frame(tmp_train %>% select(colnames(X_train))),
label=tmp_train$I)
Expand Down Expand Up @@ -3014,78 +2940,7 @@ pkg.env$evaluate_lkh_xgb <-function(X_train,
preds_tr <- preds_tr - preds_tr[1]
# }

# if(hazard_model == "LTRCtrees"){
# preds_tr <- predict(model$cox,ds_train_m)
# preds_tr <- preds_tr - preds_tr[1]
# }


train_lkh=cox_evaluation_metrix(dtrain=ds_train_m,
preds=preds_tr)


return(train_lkh)


}


pkg.env$evaluate_lkh_LTRCtrees <-function(X_train,
Y_train,
model){

xy_tr=cbind(X_train,Y_train)


tmp_tr=xy_tr %>%
arrange(DP_rev_i) %>%
as.data.frame()

# tmp_tr[,'id'] = seq(1,dim(tmp_tr)[1])
# tmp_tst[,'id'] = seq(1,dim(tmp_tst)[1])

tmp_train <- tmp_tr %>%
arrange(DP_rev_i) %>%
group_by(DP_rev_i) %>%
mutate(efron_c=(1:length(DP_rev_i)-1)/length(DP_rev_i))%>% as.data.frame()


ds_train_m <- X_train
# if(hazard_model == "XGB"){
# ds_train_m <- xgboost::xgb.DMatrix( as.matrix.data.frame(tmp_train %>% select(colnames(X_train))),
# label=tmp_train$I)}

attr(ds_train_m, 'truncation') <- tmp_train$TR_i
attr(ds_train_m, 'claim_arrival') <- tmp_train$DP_rev_i


attr(ds_train_m, 'risk_sets') <- risks_in_the_tie(starts_i=tmp_train$TR_i,
stops_i=tmp_train$DP_rev_i,
stops = unique(tmp_train$DP_rev_i))

attr(ds_train_m, 'event_sets') <- events_in_the_tie(starts_i=tmp_train$TR_i,
stops_i=tmp_train$DP_rev_i,
stops = unique(tmp_train$DP_rev_i))

attr(ds_train_m, 'efron_c') <- tmp_train$efron_c

attr(ds_train_m, 'tieid') <- unname(table(tmp_train$DP_rev_i))

attr(ds_train_m, 'groups') <- rep( as.integer(names(table(tmp_train$end_time))),
attr(ds_train_m, 'tieid'))


# if(hazard_model == "COX"){
# preds_tr <- predict(model$cox,ds_train_m)
# }
#
# if(hazard_model == "XGB"){
# preds_tr <- predict(model,ds_train_m)
# preds_tr <- preds_tr - preds_tr[1]
# }

preds_tr <- predict(model$cox,ds_train_m)
preds_tr <- preds_tr - preds_tr[1]

train_lkh=cox_evaluation_metrix(dtrain=ds_train_m,
preds=preds_tr)
Expand Down
2 changes: 1 addition & 1 deletion man/pkg.env.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 037ef93

Please sign in to comment.