diff --git a/R/brier_score.R b/R/brier_score.R index c3b319e34..045d5c147 100644 --- a/R/brier_score.R +++ b/R/brier_score.R @@ -1,4 +1,5 @@ +#' Test #' Re-used documentation for Brier Score components #' diff --git a/design/examples/mCRCpub/00- Load.R b/design/examples/mCRCpub/00- Load.R new file mode 100644 index 000000000..a20142f4e --- /dev/null +++ b/design/examples/mCRCpub/00- Load.R @@ -0,0 +1,35 @@ +#' #################################################### +#' +#' 2stgTGIOS +#' Loading the required R libraries +#' +#' Q1-2022 +#' Francois Mercier +#' +#' #################################################### + + +#' if (!require("remotes")) { +#' install.packages("remotes") +#' } +#' remotes::install_github("genentech/jmpost") +#' install.packages("ghibli") + +#' General purpose +#' ================= +library(tidyverse) +library(tidyselect) +library(ghibli) + +#' Data import/export +#' ================= +library(haven) + +#' Survival +#' ================= +library(survival) +library(survminer) + +#' Joint models +#' ================= +library(jmpost) diff --git a/design/examples/mCRCpub/01- ImportHORIZON.R b/design/examples/mCRCpub/01- ImportHORIZON.R new file mode 100644 index 000000000..0fe7af7f4 --- /dev/null +++ b/design/examples/mCRCpub/01- ImportHORIZON.R @@ -0,0 +1,101 @@ +#' #################################################### +#' +#' Data preparation +#' HORIZONIII study (from PDS) +#' +#' Q3-2024 +#' Francois Mercier +#' +#' #################################################### + + +#' sessionInfo() + +#' ---------------------------------- +#' Structure analysis data set: +#' ---------------------------------- +#' For time to event sub-model (event.df): +#' *************************** +#' SUBJID (chr) +#' STUDY (chr) +#' ATRT (chr) +#' +#' EVENTYR (num) - Time to event (in years) +#' EVENTFL (0/1) - Flag =1 if the ind died, 0 if censored +#' +#' For longi (TGI) sub-model (biom.df): +#' *************************** +#' SUBJID (chr) +#' STUDY (chr) +#' ATRT (chr) +#' +#' BIOMVAL (num) - Biomarker (here, SLD in mm) value +#' BIOMYR (num) - Biomarker measurement time (in years) +#' ---------------------------------- + + +#' =============================================== +#' Import and Select +#' =============================================================== + +#' Event data frame +#' ----------------- +subj<-haven::read_sas("./design/examples/mCRCpub/data/HORIZONIII/rdpsubj.sas7bdat") +rcist<-haven::read_sas("./design/examples/mCRCpub/data/HORIZONIII/rdprcist.sas7bdat") + +#' Only keep patients in PerProtocol set (PP_SET==1) +#' and exposed to BEV for at least 3 weeks (i.e. at least 1st TA visit) +subj0<-subj |> + filter(PP_SET==1, !is.na(BEV_SDY), !is.na(BEV_EDY), BEV_EDY>3*7) |> + mutate(SUBJID=as.character(RANDCODE)) |> + select(SUBJID, LDH1_5) +#' length(unique(subj0$SUBJID)) +#' 645 + +event.df0<-rcist |> + mutate(SUBJID=as.character(RANDCODE)) |> + filter(SUBJID %in% subj0$SUBJID) |> + group_by(SUBJID) %>% slice(1) %>% ungroup() |> + mutate(EVENTYR=OSTIM/365.25, EVENTFL=ifelse(DEATFLAG==1, 0, 1), + ATRT="FOLFOX alone", STUDY="2") |> + select(SUBJID, STUDY, ATRT, EVENTYR, EVENTFL) + +UID.event0<-unique(event.df0$SUBJID) +#' length(UID.event0) +#' 645 + +#' Biomarker (SLD) data frame +#' ----------------- +#' - remove rows where SLD is NA +#' - remove rows where patients have PBLCNT=NA i.e. remove patient with at least one post-baseline TA +biom.df0<-rcist |> + filter(!is.na(PBLCNT), !is.na(STLDI)) |> + mutate(SUBJID=as.character(RANDCODE), STUDY="2", ATRT="FOLFOX alone", + BIOMVAL=ifelse(STLDI==0, 2.5, STLDI*10), + BIOMYR=ORDYTRT/365.25) |> + select(SUBJID, STUDY, ATRT, BIOMVAL, BIOMYR) + +UID.biom0<-unique(biom.df0$SUBJID) +#' length(UID.biom0) +#' 660 + +#' Retain matching patients +#' ----------------- + +retainID<-intersect(UID.event0, UID.biom0) +#' 640 + +event.df<-event.df0 |> filter(SUBJID %in% retainID) +#' length(unique(event.df$SUBJID)) +#' 640 +#' saveRDS(event.df, file="./design/examples/mCRCpub/data/HORIZONIII/HorizOSads.rds") + + +biom.df<-biom.df0 |> filter(SUBJID %in% retainID) +#' length(unique(biom.df$SUBJID)) +#' 640 +#' saveRDS(biom.df, file="./design/examples/mCRCpub/data/HORIZONIII/HorizTGIads.rds") + + + + diff --git a/design/examples/mCRCpub/01- ImportPRIME.R b/design/examples/mCRCpub/01- ImportPRIME.R new file mode 100644 index 000000000..74bcb3529 --- /dev/null +++ b/design/examples/mCRCpub/01- ImportPRIME.R @@ -0,0 +1,96 @@ +#' #################################################### +#' +#' Data preparation +#' PRIME study (from PDS) +#' +#' Q3-2024 +#' Francois Mercier +#' +#' #################################################### + + +#' sessionInfo() + +#' ---------------------------------- +#' Structure analysis data set: +#' ---------------------------------- +#' For time to event sub-model (event.df): +#' *************************** +#' SUBJID (chr) +#' STUDY (chr) +#' ATRT (chr) +#' +#' EVENTYR (num) - Time to event (in years) +#' EVENTFL (0/1) - Flag =1 if the ind died, 0 if censored +#' +#' For longi (TGI) sub-model (biom.df): +#' *************************** +#' SUBJID (chr) +#' STUDY (chr) +#' ATRT (chr) +#' +#' BIOMVAL (num) - Biomarker (here, SLD in mm) value +#' BIOMYR (num) - Biomarker measurement time (in years) +#' ---------------------------------- + + +#' =============================================== +#' Import and Select +#' =============================================================== + +#' Event data frame +#' ----------------- +kras<-haven::read_sas("./design/examples/mCRCpub/data/PRIME/biomark_pds2019.sas7bdat") +kras0<-kras |> select("SUBJID", "BMMTR1") + +adsl<-haven::read_sas("./design/examples/mCRCpub/data/PRIME/adsl_pds2019.sas7bdat") +event.df0<-adsl |> + left_join(kras0, by="SUBJID") |> + filter(BMMTR1=="Wild-type") |> + mutate(STUDY="1", EVENTYR=DTHDY/365.25, EVENTFL=DTH) |> + select("SUBJID", "STUDY", "ATRT", "EVENTYR", "EVENTFL") + +UID.event0<-unique(event.df0$SUBJID) +#' length(UID.event0) +#' 514 + + +#' Biomarker (SLD) data frame +#' ----------------- +adtr<-haven::read_sas("./design/examples/mCRCpub/data/PRIME/adls_pds2019.sas7bdat") +biom.df0<-adtr |> + filter(LSCAT=="Target lesion", !is.na(LSSLD)) |> + mutate(STUDY="1", BIOMYR=VISITDY/365.25, BIOMVAL=LSSLD) |> + group_by(SUBJID, VISITDY) |> slice(1) |> ungroup() |> + select("SUBJID", "BIOMYR", "BIOMVAL") + +UID.biom0<-unique(biom.df0$SUBJID) +#' length(UID.biom0) +#' 488 + + +#' Retain matching patients +#' ----------------- +retainID<-intersect(UID.event0, UID.biom0) +#' 263 + +event.df<-event.df0 |> filter(SUBJID %in% retainID) +#' length(unique(event.df$SUBJID)) +#' 263 +#' saveRDS(event.df, file="./design/examples/mCRCpub/data/PRIME/PRIMEOSads.rds") + +desn0<-event.df0 |> + filter(SUBJID %in% retainID) |> + select(SUBJID, STUDY, ATRT) + +biom.df<-biom.df0 |> + filter(SUBJID %in% retainID) |> + left_join(desn0, by="SUBJID") +#' length(unique(biom.df$SUBJID)) +#' 263 +#' saveRDS(biom.df, file="./design/examples/mCRCpub/data/PRIME/PRIMETGIads.rds") + + + + + diff --git a/design/examples/mCRCpub/02- DescHORIZON.R b/design/examples/mCRCpub/02- DescHORIZON.R new file mode 100644 index 000000000..b47624210 --- /dev/null +++ b/design/examples/mCRCpub/02- DescHORIZON.R @@ -0,0 +1,65 @@ +#' #################################################### +#' +#' Descriptive plots +#' HORIZONIII study (from PDS) +#' +#' Q3-2024 +#' Francois Mercier +#' +#' #################################################### + + +#' sessionInfo() + +#' =============================================== +#' Visualize SLD +#' =============================================== + +#' biom.df<-readRDS("./design/examples/mCRCpub/data/HORIZONIII/HorizTGIads.rds") +#' summary(biom.df) + +ybreaks<-c(3, 30, 100, 300) +g0<-ggplot(biom.df, aes(x=BIOMYR, y=BIOMVAL))+ + geom_line(aes(group=SUBJID), colour="grey", alpha=0.2)+ + geom_point(colour="orange4", alpha=0.6, size=0.9)+ + scale_x_continuous("Year", breaks=0.5*(0:5))+ + scale_y_continuous("SLD (mm)", breaks=ybreaks)+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) +g0 + +set.seed(130) +retainIDsub<-sample(retainID, size=60) + +g0sub<-biom.df |> filter(SUBJID %in% retainIDsub) |> + ggplot(aes(x=BIOMYR, y=BIOMVAL))+ + geom_line(aes(group=SUBJID), colour="grey", alpha=0.2)+ + geom_point(colour="orange4", alpha=0.6, size=0.9)+ + facet_wrap(~as.factor(SUBJID))+ + scale_x_continuous("Year", breaks=0.5*(0:5))+ + scale_y_continuous("SLD (mm)", breaks=ybreaks)+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) +g0sub + + +#' =============================================== +#' Visualize OS +#' =============================================== + +#' event.df<-readRDS("./design/examples/mCRCpub/data/HORIZONIII/HorizOSads.rds") +#' summary(event.df) + +cox<-coxph(Surv(EVENTYR, EVENTFL)~1, data=event.df) +summary(cox) + +os.kmest<-survfit(Surv(EVENTYR, EVENTFL)~1, data=event.df) +mycols<-c(rev(ghibli::ghibli_palettes$YesterdayMedium)[c(2,4)]) +g1<-survminer::ggsurvplot(os.kmest, data=event.df, + risk.table=T, break.x.by=0.5, legend.title="", + xlab="Time (year)", ylab="Overall survival", + risk.table.fontsize=4, legend=c(0.8, 0.8), palette = mycols) +g1 + + + diff --git a/design/examples/mCRCpub/02- DescPRIME.R b/design/examples/mCRCpub/02- DescPRIME.R new file mode 100644 index 000000000..5a371524a --- /dev/null +++ b/design/examples/mCRCpub/02- DescPRIME.R @@ -0,0 +1,56 @@ +#' #################################################### +#' +#' Descriptive plots +#' PRIME study (from PDS) +#' +#' Q3-2024 +#' Francois Mercier +#' +#' #################################################### + + +#' sessionInfo() + +#' =============================================== +#' Visualize SLD +#' =============================================== + +#' biom.df<-readRDS("./design/examples/mCRCpub/data/PRIME/PRIMETGIads.rds") +#' summary(biom.df) + +#' Display SLD spaghetti +ybreaks<-c(3, 30, 100, 300) +mycols<-c(rev(ghibli::ghibli_palettes$YesterdayMedium)[c(2,4)]) +g0<-ggplot(data=biom.df, aes(x=BIOMYR, y=BIOMVAL))+ + geom_point(colour="grey33", alpha=0.3, size=0.9)+ + geom_line(aes(group=SUBJID, colour=as.factor(ATRT)), alpha=0.6)+ + facet_wrap(~ATRT)+ + scale_x_continuous("Year", breaks=0.5*(0:5))+ + scale_y_continuous("SLD (mm)", breaks=ybreaks)+ + scale_colour_manual(values=mycols, guide="none")+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) +g0 + +#' =============================================== +#' Visualize OS +#' =============================================== + +#' event.df<-readRDS("./design/examples/mCRCpub/data/PRIME/PRIMEOSads.rds") +#' summary(event.df) + +cox<-coxph(Surv(EVENTYR, EVENTFL)~ATRT, data=event.df) +summary(cox) + +os.kmest<-survfit(Surv(EVENTYR, EVENTFL)~ATRT, data=event.df) + +#' Display OS KM +g1<-survminer::ggsurvplot(os.kmest, + data=event.df, risk.table=T, break.x.by=.5, legend.title="", + xlab="Year", ylab="Overall survival", palette = mycols, + risk.table.fontsize=4, legend=c(0.8, 0.8)) +g1 + + + + diff --git a/design/examples/mCRCpub/03- jmHORIZON.R b/design/examples/mCRCpub/03- jmHORIZON.R new file mode 100644 index 000000000..28e789576 --- /dev/null +++ b/design/examples/mCRCpub/03- jmHORIZON.R @@ -0,0 +1,139 @@ +#' #################################################### +#' +#' Joint models +#' HORIZONIII study (from PDS) +#' +#' Q3-2024 +#' Francois Mercier +#' +#' #################################################### + + +#' sessionInfo() + + +#' biom.df<-readRDS("./design/examples/mCRCpub/data/PRIME/HorizTGIads.rds") +#' summary(biom.df) + +#' event.df<-readRDS("./design/examples/mCRCpub/data/PRIME/HorizOSads.rds") +#' summary(event.df) + + +#' =============================================== +#' Model SLD +#' =============================================== + +###### +#' !!!!!!!!!!! +###### +event.subset.for.test<-event.df |> filter(SUBJID %in% retainIDsub) +biom.subset.for.test<-biom.df |> filter(SUBJID %in% retainIDsub) +###### +#' !!!!!!!!!!! +###### + +tgi.dat<-DataJoint( + subject = DataSubject(data=event.subset.for.test, subject="SUBJID", arm="ATRT", study="STUDY"), + longi = DataLongitudinal(data=biom.subset.for.test, threshold=3, formula= BIOMVAL~BIOMYR) + ) + +tgi.in<-JointModel(longitudinal=LongitudinalSteinFojo( + mu_bsld=prior_lognormal(log(70), .2), + mu_ks=prior_lognormal(log(0.01), .3), + mu_kg=prior_lognormal(log(0.01), .3), + omega_bsld=prior_lognormal(log(0.5), 0.3), + omega_ks=prior_lognormal(log(0.7), 0.3), + omega_kg=prior_lognormal(log(0.7), 0.3), + sigma=prior_lognormal(log(0.2), 0.1))) + +tgi.samples<-sampleStanModel(tgi.in, data=tgi.dat, iter_warmup=2000, + iter_sampling=1000, chains=3, parallel=3, refresh=500) +tgi.out<-as.CmdStanMCMC(tgi.samples) +print(tgi.out, max_rows=500, digits=5) + +#' Display profiles OBS vs IPRED for 10 random individuals +selected_subjects<-sample(event.subset.for.test$SUBJID, 10) +longquant_obs<-LongitudinalQuantities(tgi.samples, grid=GridObserved(subjects=selected_subjects)) +g2<-autoplot(longquant_obs)+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) +g2 +summary(longquant_obs) + +#' mu_BSLD: hist(rlnorm(1000, log(70), .2)) +#' mu_ks: hist(rlnorm(1000, log(0.01), .3)) +#' om_BSLD: hist(rlnorm(1000, log(0.5), 0.3)) +#' om_ks: hist(rlnorm(1000, log(0.7), 0.3)) +#' om_sigma: hist(rlnorm(1000, log(0.2), 0.1)) + + +#' =============================================== +#' Model OS +#' =============================================== + +surv.dat<-DataJoint( + subject = DataSubject(data=event.df, subject="SUBJID", arm="ATRT", study="STUDY"), + survival = DataSurvival(data=event.df, formula=Surv(EVENTYR, EVENTFL)~1) +) + +surv.in<-JointModel(survival=SurvivalWeibullPH()) + +surv.samples<-sampleStanModel(surv.in, data=surv.dat, iter_warmup=2000, + iter_sampling=1000, chains=3, parallel=3, refresh=500) +surv.out<-as.CmdStanMCMC(surv.samples) +print(surv.out, max_rows=500, digits=5) + +#' Display PRED vs OBS surv curves +expected.surv<-SurvivalQuantities(surv.samples, type="surv", + grid=GridGrouped(times=seq(from=0, to=3, by=0.1), + groups=split(event.df$SUBJID, event.df$ATRT))) + +mycols<-c(rev(ghibli::ghibli_palettes$YesterdayMedium)[c(2,4)]) +g3<-autoplot(expected.surv, add_km=T, add_wrap=F)+ + scale_fill_manual(values=mycols)+ + scale_colour_manual(values=mycols)+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) +g3 + + +#' =============================================== +#' Model JM +#' =============================================== + +jm.dat<-DataJoint( + subject = DataSubject(data=event.df, subject="SUBJID", arm="ATRT", study="STUDY"), + longitudinal = DataLongitudinal(data=biom.df, threshold=2, formula= BIOMVAL~BIOMYR), + survival = DataSurvival(data=event.df, formula=Surv(EVENTYR, EVENTFL)~1) +) + +jm.in<-JointModel( + longitudinal=LongitudinalSteinFojo( + mu_bsld=prior_lognormal(log(70), .2), + mu_ks=prior_lognormal(log(0.01), .3), + mu_kg=prior_lognormal(log(0.01), .3), + omega_bsld=prior_lognormal(log(0.5), 0.3), + omega_ks=prior_lognormal(log(0.7), 0.3), + omega_kg=prior_lognormal(log(0.7), 0.3), + sigma=prior_lognormal(log(0.2), 0.1)), + survival=SurvivalWeibullPH(), + link=Link(linkTTG(prior_normal(0.01, 3))) +) + +jm.samples<-sampleStanModel(jm.in, data=jm.dat, + iter_sampling=1000, iter_warmup=2000, chains=3, parallel_chains=3) + +jm.out<-as.CmdStanMCMC(jm.samples) +print(jm.out, max_rows=500, digits=5) + +#' Display PRED vs OBS surv curves +expected.surv<-SurvivalQuantities(jm.samples, type="surv", + grid=GridGrouped(times=seq(from=0, to=3, by=0.1), + groups=split(event.df$SUBJID, event.df$ATRT))) + +g4<-autoplot(expected.surv, add_km=T, add_wrap=F)+ + scale_fill_manual(values=mycols)+ + scale_colour_manual(values=mycols)+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) +g4 diff --git a/design/examples/mCRCpub/03- jmPRIME.R b/design/examples/mCRCpub/03- jmPRIME.R new file mode 100644 index 000000000..60e512a64 --- /dev/null +++ b/design/examples/mCRCpub/03- jmPRIME.R @@ -0,0 +1,128 @@ +#' #################################################### +#' +#' Joint models +#' PRIME study (from PDS) +#' +#' Q3-2024 +#' Francois Mercier +#' +#' #################################################### + + +#' sessionInfo() + + +#' biom.df<-readRDS("./design/examples/mCRCpub/data/PRIME/PRIMETGIads.rds") +#' summary(biom.df) + +#' event.df<-readRDS("./design/examples/mCRCpub/data/PRIME/PRIMEOSads.rds") +#' summary(event.df) + + +#' =============================================== +#' Model SLD +#' =============================================== + +tgi.dat<-DataJoint( + subject = DataSubject(data=event.df, subject="SUBJID", arm="ATRT", study="STUDY"), + longi = DataLongitudinal(data=biom.df, threshold=3, formula= BIOMVAL~BIOMYR) + ) + +tgi.in<-JointModel(longitudinal=LongitudinalSteinFojo( + mu_bsld = prior_lognormal(log(70), .1), + mu_ks = prior_lognormal(log(1.8), .1), + mu_kg = prior_lognormal(log(0.15), .1), + omega_bsld = prior_lognormal(log(0.1), .1), + omega_ks = prior_lognormal(log(0.1), .5), + omega_kg = prior_lognormal(log(0.1), .5), + sigma = prior_lognormal(log(0.18), .5), +)) + +tgi.samples<-sampleStanModel(tgi.in, data=tgi.dat, + iter_sampling = 1000, + iter_warmup = 2000, + chains = 3, parallel_chains = 3) +tgi.out<-as.CmdStanMCMC(tgi.samples) +print(tgi.out, max_rows=500, digits=5) + +#' Display profiles OBS vs IPRED for 10 random individuals +selected_subjects<-head(event.df$SUBJID, 10) +longquant_obs<-LongitudinalQuantities(tgi.samples, grid=GridObserved(subjects=selected_subjects)) +autoplot(longquant_obs) + + +#' =============================================== +#' Model OS +#' =============================================== + +surv.dat<-DataJoint( + subject = DataSubject(data=event.df, subject="SUBJID", arm="ATRT", study="STUDY"), + survival = DataSurvival(data=event.df, formula=Surv(EVENTYR, EVENTFL)~ATRT) +) + +surv.in<-JointModel(survival=SurvivalWeibullPH()) + +surv.samples<-sampleStanModel(surv.in, data=surv.dat, + iter_sampling = 1000, + iter_warmup = 2000, + chains = 3, parallel_chains = 3) +surv.out<-as.CmdStanMCMC(surv.samples) +print(surv.out, max_rows=500, digits=5) + +#' Display PRED vs OBS surv curves +expected.surv<-SurvivalQuantities(surv.samples, type="surv", + grid=GridGrouped(times=seq(from=0, to=4, by=0.1), + groups=split(event.df$SUBJID, event.df$ATRT))) + +mycols<-c(rev(ghibli::ghibli_palettes$YesterdayMedium)[c(2,4)]) +g3<-autoplot(expected.surv, add_km=T, add_wrap=F)+ + scale_fill_manual(values=mycols)+ + scale_colour_manual(values=mycols)+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) +g3 + + +#' =============================================== +#' Model JM +#' =============================================== + +jm.dat<-DataJoint( + subject = DataSubject(data=event.df, subject="SUBJID", arm="ATRT", study="STUDY"), + longitudinal = DataLongitudinal(data=biom.df, threshold=3, formula= BIOMVAL~BIOMYR), + survival = DataSurvival(data=event.df, formula=Surv(EVENTYR, EVENTFL)~ATRT) +) + +jm.in<-JointModel( + longitudinal=LongitudinalSteinFojo( + mu_bsld = prior_lognormal(log(70), .1), + mu_ks = prior_lognormal(log(1.8), .1), + mu_kg = prior_lognormal(log(0.15), .1), + omega_bsld = prior_lognormal(log(0.1), .1), + omega_ks = prior_lognormal(log(0.1), .5), + omega_kg = prior_lognormal(log(0.1), .5), + sigma = prior_lognormal(log(0.18), .5), + ), + survival = SurvivalWeibullPH(), + link = Link(linkTTG(prior_normal(0.01, 3))) +) + +jm.samples<-sampleStanModel(jm.in, + data = jm.dat, + iter_sampling = 1000, + iter_warmup = 2000, + chains = 3, parallel_chains = 3) + +jm.out<-as.CmdStanMCMC(jm.samples) +print(jm.out, max_rows=500, digits=5) + +#' Display PRED vs OBS surv curves +expected.surv<-SurvivalQuantities(jm.samples, type="surv", + grid=GridGrouped(times=seq(from=0, to=4, by=0.1), + groups=split(event.df$SUBJID, event.df$ATRT))) +autoplot(expected.surv, add_km=T, add_wrap=F)+ + scale_fill_manual(values=mycols)+ + scale_colour_manual(values=mycols)+ + theme_minimal()+ + theme(panel.grid.minor=element_blank()) + diff --git a/design/examples/mCRCpub/data/HORIZONIII/HorizOSads.rds b/design/examples/mCRCpub/data/HORIZONIII/HorizOSads.rds new file mode 100644 index 000000000..6831bbd64 Binary files /dev/null and b/design/examples/mCRCpub/data/HORIZONIII/HorizOSads.rds differ diff --git a/design/examples/mCRCpub/data/HORIZONIII/HorizTGIads.rds b/design/examples/mCRCpub/data/HORIZONIII/HorizTGIads.rds new file mode 100644 index 000000000..23322c066 Binary files /dev/null and b/design/examples/mCRCpub/data/HORIZONIII/HorizTGIads.rds differ diff --git a/design/examples/mCRCpub/data/HORIZONIII/rdprcist.sas7bdat b/design/examples/mCRCpub/data/HORIZONIII/rdprcist.sas7bdat new file mode 100644 index 000000000..ad8eca6e6 Binary files /dev/null and b/design/examples/mCRCpub/data/HORIZONIII/rdprcist.sas7bdat differ diff --git a/design/examples/mCRCpub/data/HORIZONIII/rdpsubj.sas7bdat b/design/examples/mCRCpub/data/HORIZONIII/rdpsubj.sas7bdat new file mode 100644 index 000000000..f3a5fd5ef Binary files /dev/null and b/design/examples/mCRCpub/data/HORIZONIII/rdpsubj.sas7bdat differ diff --git a/design/examples/mCRCpub/data/PRIME/PRIMEOSads.rds b/design/examples/mCRCpub/data/PRIME/PRIMEOSads.rds new file mode 100644 index 000000000..ba2624d49 Binary files /dev/null and b/design/examples/mCRCpub/data/PRIME/PRIMEOSads.rds differ diff --git a/design/examples/mCRCpub/data/PRIME/PRIMETGIads.rds b/design/examples/mCRCpub/data/PRIME/PRIMETGIads.rds new file mode 100644 index 000000000..3d6ee1a33 Binary files /dev/null and b/design/examples/mCRCpub/data/PRIME/PRIMETGIads.rds differ diff --git a/design/examples/mCRCpub/data/PRIME/adls_pds2019.sas7bdat b/design/examples/mCRCpub/data/PRIME/adls_pds2019.sas7bdat new file mode 100644 index 000000000..d9fbee845 Binary files /dev/null and b/design/examples/mCRCpub/data/PRIME/adls_pds2019.sas7bdat differ diff --git a/design/examples/mCRCpub/data/PRIME/adsl_pds2019.sas7bdat b/design/examples/mCRCpub/data/PRIME/adsl_pds2019.sas7bdat new file mode 100644 index 000000000..dbf00bb3f Binary files /dev/null and b/design/examples/mCRCpub/data/PRIME/adsl_pds2019.sas7bdat differ diff --git a/design/examples/mCRCpub/data/PRIME/biomark_pds2019.sas7bdat b/design/examples/mCRCpub/data/PRIME/biomark_pds2019.sas7bdat new file mode 100644 index 000000000..40ed60ee4 Binary files /dev/null and b/design/examples/mCRCpub/data/PRIME/biomark_pds2019.sas7bdat differ diff --git a/design/examples/mCRCpub/trash/expon-ph-benchmark.R b/design/examples/mCRCpub/trash/expon-ph-benchmark.R new file mode 100644 index 000000000..b35a92159 --- /dev/null +++ b/design/examples/mCRCpub/trash/expon-ph-benchmark.R @@ -0,0 +1,156 @@ +#' ############################################################################# +#' +#' Fit Weibull PH model to ‘pb’ data from the {flexsurv} package, +#' using various tools/packages: +#' I. flexsurv +#' II. survstan +#' III. jmpost +#' +#' Initiated on: 2024-03-28 +#' Author: F. Mercier +#' +#' ############################################################################# + + + +#' =========================================================== +#' +#' INSTALL AND LOAD NECESSARY LIBRARIES +#' +#' =========================================================== + +#' General +#' ========================== +library(Rcpp) +library(cli) +library(here) +library(tidyverse) + +#' Survival +#' ========================== +#' install.packages("devtools") +#' install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos"))) +#' devtools::install("~/jmpost") +library(cmdstanr) +library(flexsurv) +library(survstan) +library(jmpost) + + +#' =========================================================== +#' +#' DATA PREP +#' +#' =========================================================== + +#' From the flexsurv package +head(bc, 2) + +#' recyrs represents the time (in years) of death or cancer recurrence when +#' censrec is 1, or (right-)censoring when censrec is 0. +#' The covariate group is a factor representing a prognostic score, with 3 levels: +#' "Good" (the reference), "Medium" and "Poor". +#' In flexsur, the baseline Weibull model is implemented as: +#' S(t) = exp(-(t/mu)^alpha), with alpha=shape param, mu=scale param. +#' group is a linear effect on log(mu) + + +#' =========================================================== +#' +#' Weibull PH model fits +#' +#' =========================================================== + +#' Flexsurv +#' ========================== +# flexsurv.exp<-flexsurvreg(Surv(recyrs, censrec)~group, data=bc, dist="exp") +# flexsurv.exp +# plot(flexsurv.exp) +# flexsurv.exp$AIC +# flexsurv.exp$res + +flexsurv.weiph<-flexsurvreg(Surv(recyrs, censrec)~group, data=bc, dist="weibullph") +flexsurv.weiph + +#' Survstan +#' ========================== +# survstan.exp<-survstan::phreg(Surv(recyrs, censrec)~group, data=bc, dist="exponential") +# sumsurvstan<-summary(survstan.exp) +# sumsurvstan$AIC +# sumsurvstan$coefficients +# sumsurvstan$tbl + +survstan.weiph<-survstan::phreg(Surv(recyrs, censrec)~group, data=bc, dist="weibull") +summary(survstan.weiph) + + +#' jmpost +#' ========================== +# jmpost.survonly.exp<-JointModel(survival=SurvivalExponential(lambda=prior_lognormal(log(0.06), 1))) + +# bc1<-bc %>% mutate(ID=as.character(1:n()), study=1) + +# jdat<-DataJoint( +# subject=DataSubject(data=bc1, subject="ID", arm="group", study="study"), +# survival=DataSurvival(data=bc1, formula=Surv(recyrs, censrec)~group) +# ) + +# mp<-sampleStanModel(jmpost.survonly.exp, data=jdat, iter_warmup=4000, +# iter_sampling=1000, chains=4, refresh=0) + +# vars<-c("sm_exp_lambda", "beta_os_cov") +# mp@results$summary(vars) + +jmpost.weiph<-JointModel(survival=SurvivalWeibullPH()) + +bc1<-bc %>% mutate(ID=as.character(1:n()), study=1) + +jdat<-DataJoint( + subject=DataSubject(data=bc1, subject="ID", arm="group", study="study"), + survival=DataSurvival(data=bc1, formula=Surv(recyrs, censrec)~group) +) + +mp<-sampleStanModel(jmpost.weiph, data=jdat, iter_warmup=4000, + iter_sampling=1000, chains=4, refresh=0) +mp@results + +# vars<-c("sm_exp_lambda", "beta_os_cov") +# mp@results$summary(vars) + + + +#' =========================================================== +#' +#' Pooling results together +#' +#' =========================================================== + +flexsurv.prep<-as_tibble(signif(flexsurv.exp$res, 3)) |> + rename(Estimate=est, SE=se, P025=`L95%`, P975=`U95%`) |> + relocate(SE, .after=Estimate) |> + mutate(meth="flexsurv", rowN=1:3, libel=c("lambda", "beta_group_Medium", "beta_group_Poor"), .before=1) + +survstan.prep1<-as_tibble(signif(sumsurvstan$tbl, 3)) |> + rename(Estimate=estimate, P025=`2.5%`, P975=`97.5%`, SE=se) +survstan.prep2<-as_tibble(signif(sumsurvstan$coefficients[,1:2], 3)) |> + rename(SE=StdErr) |> + mutate(P025=999, P975=999, .after=1) +survstan.prep<-rbind(survstan.prep1, survstan.prep2) |> + mutate(meth="survstan", rowN=1:3, libel=c("lambda", "beta_group_Medium", "beta_group_Poor"), .before=1) + +jmpost.prep<-as_tibble(mp@results$summary(vars)) |> + rename(Estimate=mean, SE=sd, P025=q5, P975=q95) |> + select(Estimate, SE, P025, P975) |> + mutate(meth="jmpost", rowN=1:3, libel=c("lambda", "beta_group_Medium", "beta_group_Poor"), .before=1) + + +altogether<-rbind(flexsurv.prep, survstan.prep, jmpost.prep) + + + + + + + + + diff --git a/design/examples/mCRCpub/trash/expon-ph-outputs.R b/design/examples/mCRCpub/trash/expon-ph-outputs.R new file mode 100644 index 000000000..4119df177 --- /dev/null +++ b/design/examples/mCRCpub/trash/expon-ph-outputs.R @@ -0,0 +1,296 @@ +#' ############################################################################# +#' +#' For a time to event model of ‘pb’ data from the {flexsurv} package, +#' assuming an exponential baseline hazard, the goal here is to +#' illustrate the way to assess: +#' (i) Convergence +#' (ii) GoF +#' +#' Solutions proposed by {survstan} will be used as benchmark +#' +#' Initiated on: 2024-02-14 +#' Author: F. Mercier +#' +#' ############################################################################# + + +#' =========================================================== +#' +#' INSTALL AND LOAD NECESSARY LIBRARIES +#' +#' =========================================================== + +library(survstan) +library(here) +install.packages("cmdstanr", + repos = c("https://stan-dev.r-universe.dev/", getOption("repos"))) +if (!require("remotes")) { + install.packages("remotes") +} +remotes::install_github("genentech/jmpost") + +library(tidyverse) +library(flexsurv) +library(cmdstanr) +library(posterior) +library(bayesplot) +library(tidybayes) +library(ggdist) +library(bayestestR) +library(loo) +library(jmpost) + +library(cmdstanr) +check_cmdstan_toolchain() +cmdstan_path() +file <- file.path(cmdstan_path(), "examples", "bernoulli", "bernoulli.stan") +mod <- cmdstan_model(file) +mod$print() +mod$exe_file() +data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) + +fit <- mod$sample( + data = data_list, + seed = 123, + chains = 4, + parallel_chains = 4, + refresh = 500 # print update every 500 iters +) +fit$summary() + + +#' =========================================================== +#' +#' Data (from the flexsurv package) +#' +#' =========================================================== + +head(bc, 2) +glimpse(bc) +rbind(head(bc), tail(bc)) + +#' Kaplan-Meier plot and Risk table +p001 <- ggsurvfit::survfit2(Surv(recyrs, censrec) ~ group, data = bc) |> + ggsurvfit(linewidth = 1) + + add_confidence_interval() + + add_risktable() + + add_quantile(y_value = 0.5, color = "gray50", linewidth = 0.75) + + scale_ggsurvfit() +p001 + +#' Checking independent censoring +p002 <- ggsurvfit::survfit2(Surv(recyrs, 1-censrec) ~ group, data = bc) |> + ggsurvfit(linewidth = 1) + + add_confidence_interval() + + scale_ggsurvfit() +p002 + +#' Kernel density estimate for the hazard +futime=bc$recyrs; fustat=bc$censrec +fit1 <- muhaz::muhaz(futime, fustat, bw.method="g") +plot(fit1) +summary(fit1) + + +#' =========================================================== +#' +#' jmpost exponential PH implementation +#' +#' =========================================================== + + +jmpost.survonly.exp<-JointModel(survival=SurvivalExponential(lambda=prior_lognormal(log(0.06), 1))) + +bc1<-bc %>% mutate(ID=as.character(1:n()), study=1) + +jdat<-DataJoint( + subject=DataSubject(data=bc1, subject="ID", arm="group", study="study"), + survival=DataSurvival(data=bc1, formula=Surv(recyrs, censrec)~group) +) + +mp<-sampleStanModel(jmpost.survonly.exp, data=jdat, iter_warmup=4000, + iter_sampling=1000, chains=4, refresh=0) + +mp@results +## +vars<-c("sm_exp_lambda", "beta_os_cov") +# mp@results$summary(vars) + + +#' =========================================================== +#' +#' I. POSTERIOR DISTRIBUTION SUMMARY +#' +#' =========================================================== + + +#' Convert the samples in a df +#' ------------------------------------------------------------\ +my_fitall_df <- posterior::as_draws_df(mp@results) +my_pars<-c("sm_exp_lambda", "beta_os_cov[1]", "beta_os_cov[2]") +my_fitpop_df<-posterior::subset_draws(my_fitall_df, variable=my_pars) + +#' Summarize the posterior samples +#' ------------------------------------------------------------\ +#' Using {posterior} +posterior::summarise_draws(my_fitpop_df) + + +#' Quantiles or Highest density interval +#' ------------------------------------------------------------\ +#' For each parameter, it is possible to extract various quantities: +#' mean_hdci(), median_hdci(), etc ... from [median|mean|mode]_[qi|hdi] +#' see: ggdist:: ?mean_qi +my_fitpop_df %>% + tidybayes::spread_rvars(sm_exp_lambda) %>% + ggdist::mean_hdci() +my_fitpop_df %>% + tidybayes::spread_rvars(sm_exp_lambda) %>% + ggdist::mean_qi() + +#' Median (and associated uncertainty expressed as lower and upper bounds for +#' pmf containing 50%, 89%, 90% of the distribution density +#' = e.g. 89% credibility interval for the median) +#' for sm_exp_lambda across all chains +my_fitpop_df %>% + tidybayes::spread_rvars(sm_exp_lambda) %>% + tidybayes::median_qi(.width = c(.50, .89, .95)) + +#' Q1, median, Q3 of sm_exp_lambda across all chains +lambda_samples<-my_fitpop_df %>% + tidybayes::spread_draws(sm_exp_lambda) +lambda_samples$sm_exp_lambda %>% quantile(., probs=c(0.025, 0.5, 0.975)) + + +#' Posterior density +#' using tidybayes see https://mjskay.github.io/tidybayes/articles/tidy-rstanarm.html +#' ------------------------------------------------------------\ + +my_fitpop_df %>% + tidybayes::spread_draws(sm_exp_lambda) %>% + tidybayes::median_qi(., .width = c(.95, .66)) %>% + ggplot(aes(y = NA, x = sm_exp_lambda, xmin = .lower, xmax = .upper)) + + scale_x_continuous("Lambda_0")+ + scale_y_discrete("")+ + tidybayes::geom_pointinterval()+ + labs(title="sm_exp_lambda posterior distributions", + caption="OS analysis / mercief3 / 2024-02-20")+ + theme_minimal() + +#' (Post-warmup) Posterior distributions +#' ------------------------------------------------------------\ +bayesplot::mcmc_areas(my_fitpop_df, pars = my_pars, prob = 0.8)+ + theme_minimal() +bayesplot::mcmc_dens_overlay(my_fitpop_df, pars=my_pars)+ + theme_minimal() + + + +#' =========================================================== +#' +#' II. CONVERGENCE +#' +#' =========================================================== + + +#' Checking that Rhat<1.05 +#' ------------------------------------------------------------\ +#' see https://cran.r-project.org/web/packages/bayesplot/vignettes/visual-mcmc-diagnostics.html + +#' Using {bayesplot} +rhats <- bayesplot::rhat(mp@results) +print(rhats) +color_scheme_set("brightblue") # see help("color_scheme_set") +bayesplot::mcmc_rhat(rhats)+ yaxis_text(hjust = 1) + + +#' Checking ESS +#' ------------------------------------------------------------\ +#' Using {bayesplot} +#' see https://cran.r-project.org/web/packages/bayesplot/vignettes/visual-mcmc-diagnostics.html +neff_ratios <- bayesplot::neff_ratio(mp@results) +print(neff_ratios) +color_scheme_set("brightblue") # see help("color_scheme_set") +bayesplot::mcmc_neff(neff_ratios, size = 2)+ yaxis_text(hjust = 1) + + +#' Checking autocorrelation +#' ------------------------------------------------------------\ +#' Using {bayesplot} +#' see https://cran.r-project.org/web/packages/bayesplot/vignettes/visual-mcmc-diagnostics.html +bayesplot::mcmc_acf_bar(my_fit_df, pars="sm_weibull_ph_lambda", lags=10)+ + theme_minimal() + + +#' Caterpillar plot for each param +#' ------------------------------------------------------------\ +bayesplot::mcmc_trace(my_fit_df, pars = my_pars)+ + theme_minimal() + + + +#' =========================================================== +#' +#' III. GOODNESS OF FIT +#' +#' =========================================================== + +# Log Likelihood +#' =========================================================== +log_lik <- mp@results$draws("log_lik", format = "draws_matrix") |> + apply(1, sum) |> + mean() +log_lik + +# AIC +k <- 2 +-2 * log_lik + k * 4 + +# BIC +(4 * log(nrow(bc1))) + (-2 * log_lik) + +# Leave one out CV +mp@results$loo() + + + + + +#' =========================================================== +#' +#' IV. PREDICTIONS +#' +#' Overlaying KM and posterior survival curves +#' See example: +#' https://stablemarkets.netlify.app/post/post2/specifying-accelerated-failure-time-models-in-stan/ +#' +#' +#' =========================================================== + +# Calculate the survival distribution for each subject at each desired timepoint +# To get different quantities change the `pweibullPH` to the desired distribution +# function e.g. hweibullPH / HweibullPH + +zparms<-my_fitpop_df |> + pivot_longer(cols = 1:3, names_to = "parms", values_to = "value") |> + group_by(parms) |> + tidybayes::median_qi() |> + ungroup() + +jdat<-DataJoint( + subject=DataSubject(data=bc1, subject="ID", arm="group", study="study"), + survival=DataSurvival(data=bc1, formula=Surv(recyrs, censrec)~group) +) + +set.seed(13579) +covs <- data.frame(ID = bc1$ID, trt=bc1$group) +s1 <- simsurv(dist="exponential", + x=covs, betas=c(trt=0), + lambdas = zsurv$lambda[1], maxt = max(bc1$recyrs)) + +survfit2(Surv(eventtime, status) ~ 1, data = s1) |> + ggsurvfit(linewidth = 1) + + add_confidence_interval() + + add_risktable() + + add_quantile(y_value = 0.6, color = "gray50", linewidth = 0.75) + + scale_ggsurvfit() diff --git a/design/examples/mCRCpub/trash/trash.R b/design/examples/mCRCpub/trash/trash.R new file mode 100644 index 000000000..575f2daf4 --- /dev/null +++ b/design/examples/mCRCpub/trash/trash.R @@ -0,0 +1,46 @@ + diff --git a/man/Brier-Score-Shared.Rd b/man/Brier-Score-Shared.Rd index a9c7e6869..ea9eb9539 100644 --- a/man/Brier-Score-Shared.Rd +++ b/man/Brier-Score-Shared.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/brier_score.R \name{Brier-Score-Shared} \alias{Brier-Score-Shared} -\title{Re-used documentation for Brier Score components} +\title{Test +Re-used documentation for Brier Score components} \arguments{ \item{t}{(\code{numeric})\cr timepoints to calculate the desired quantity at.} @@ -22,6 +23,7 @@ package.} \item{...}{not used.} } \description{ +Test Re-used documentation for Brier Score components } \keyword{internal} diff --git a/vignettes/.gitignore b/vignettes/.gitignore index 097b24163..3432c3fd8 100644 --- a/vignettes/.gitignore +++ b/vignettes/.gitignore @@ -1,2 +1,2 @@ *.html -*.R + diff --git a/vignettes/statistical-specification.Rmd b/vignettes/statistical-specification.Rmd index ac75248c1..000f46422 100644 --- a/vignettes/statistical-specification.Rmd +++ b/vignettes/statistical-specification.Rmd @@ -9,8 +9,8 @@ output: link-citations: true vignette: > %\VignetteIndexEntry{Statistical Specifications} - %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} editor_options: chunk_output_type: console --- diff --git a/vignettes/survivalonly-1-statsspecs.Rmd b/vignettes/survivalonly-1-statsspecs.Rmd new file mode 100644 index 000000000..7344dd3b4 --- /dev/null +++ b/vignettes/survivalonly-1-statsspecs.Rmd @@ -0,0 +1,378 @@ +--- +title: "01. Survival models" +subtitle: "Part 1. Statistical specifications" +package: jmpost +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Model Fitting} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console + markdown: + wrap: 72 +--- + +```{r, include=FALSE} +knitr::opts_chunk$set(echo = TRUE, collapse = TRUE) +``` + +## Scope of this document + +This document describes how to analyze survival data using parametric +survival models implemented in the `jmpost` R package. It is organized +in 4 sections: + +- Introduction + +- Modeling framework + +- Implementation + +## 1. Introduction + +Survival analysis, also referred to as time-to-event or failure time +analysis, investigates the duration until one or more events of interest +occur and the factors influencing these durations. Comprehensive +introductions to survival analysis are provided by Kalbfleisch and +Prentice (2002), Collett (2003), and Hosmer et al. (2008). + +Three predominant approaches to modeling survival data are commonly +employed: + +1. Hazard Rate Models: These models describe the instantaneous rate of + the event (the hazard) as a function of time, treated as a + continuous variable. This category includes the widely used class of + proportional hazards (PH) regression models. + +2. Event Time Models: These models directly analyze the event time + itself, also treating time as a continuous variable. The accelerated + failure time (AFT) models fall under this category. + +3. Discrete Time Models: These models analyze the occurrence of events + when time is considered as a discrete variable, with the additive + hazard regression (AHR) model being a specific example. + +In the current version of the `jmpost` package, we focus on the first +approach, specifically on PH models within a Bayesian framework. + +Proportional hazards (PH) models are pivotal in survival analysis and +rely on the key assumption that the hazard ratio (HR) comparing any two +levels of a covariate remains constant over time. The Cox PH model is +the most popular among these models for two primary reasons: (a) it does +not require assumptions about the probability distribution of survival +times, and (b) it generally fits data well across various parametric +models. + +In contrast, fully parametric PH models require a distributional +assumption (Kalbfleisch and Prentice 2002; Lawless 2002). When the +appropriate distribution is selected, parametric models can provide more +efficient estimates, reflected in smaller standard errors compared to +the Cox model (Collett 2003). Moreover, using the Cox PH model in joint +modeling of time-to-event and longitudinal data (Wulfsohn and Tsiatis +1997) often leads to underestimation of standard errors of parameter +estimates (Hsieh et al. 2006; Rizopoulos 2012). Consequently, most joint +modeling methods are based on parametric response distributions (Hwang +and Pennell 2014). + +Furthermore, the application of Cox proportional hazards (PH) models in +joint modeling of time-to-event and longitudinal data often results in +the underestimation of the standard errors of parameter estimates +(Wulfsohn and Tsiatis 1997; Hsieh et al. 2006; Rizopoulos 2012). As a +result, many methods for joint modeling are predicated on parametric +response distributions (Hwang and Pennell 2014). + +Bayesian inference offers several advantages: + +- The ability to make probability statements about parameters. + +- Natural handling of hierarchical structures. + +- Robust statistical properties, particularly with small sample sizes. + +- The capacity to easily quantify uncertainty in predicted quantities. + +These properties are highly advantageous for joint models and are +equally beneficial when applied to survival models independently. + +`jmpost` is developed in Stan, a high-level language (itself implemented +in C++) designed for Bayesian modeling and inference. Stan primarily +utilizes a variant of the No-U-Turn sampler (NUTS) (Hoffman & Gelman, +2014) to generate posterior samples based on a user-specified model and +data. + +While Stan is an exceptionally flexible and powerful tool for +statistical modeling, it can be challenging to learn and is not +inherently user-friendly. To address this barrier, we introduce `jmpost` +as an additional layer atop Stan, simplifying the encoding of +proportional hazards (PH) models. This includes both stand-alone PH +models and those integrated within joint models. + +## 2. Modeling framework + +### 2.1. Distribution of time-to-event data + +Let $T$ denote a continuous non-negative random variable representing +survival time. This variable can be specified by its density function +$f(t)$ and cumulative distribution function $F(t)$. The hazard function, +defined as + +$$ +h(t) = \lim_{\Delta \to 0} \frac{P(t < T \leq t + \Delta \mid T > t)}{\Delta} +$$ + +plays a central role in modeling the time to event. The hazard function +represents the risk of an event occurring immediately after time $t$, +given that no event has occurred up to time $t$. From this definition, +we can derive that $h(t) = \frac{f(t)}{S(t)}$, where $S(t) = 1 - F(t)$ +is the survival function, representing the probability of no event +occurring until time $t$. + +From the hazard function, we can define the cumulative hazard function +as + +$$ +H(t) = \int_0^t h(\tau) \, d\tau +$$ + +which is related to the survival function by $S(t) = \exp(-H(t))$. The +relationships between $h(t)$, $H(t)$, $f(t)$, and $S(t)$ constitute the +fundamental formulas that represent key concepts in survival modeling. + +In practice, the exact event time may not be observed due to right +censoring, such as when the event occurs after the conclusion of a +clinical trial. Formally, let $T_i^*$ be the true event time and $C_i$ +be the censoring time (e.g., the end of the study). We observe +$T_i = \min(T_i^*, C_i)$. To distinguish censored observations, we use +an event indicator $d_i$, which is 0 for right censoring ($T_i^* > C_i$) +and 1 if the event is observed ($T_i = T_i^*$). Thus, the observed +outcome is defined by the pair $\{T_i, d_i\}$, where $T_i > 0$. + +### 2.2. Parametric model + +Parametric time-to-event models are constructed by specifying the +outcome distribution. For instance, suppose $T$ follows an exponential +distribution: + +$$ T \sim \text{exp}(\lambda) \quad \text{with} \; \lambda > 0, $$ + +with + +$$ f(t) = \lambda \cdot \exp(-\lambda t) \quad \text{and} \quad F(t) = 1 - \exp(-\lambda t). $$ + +This leads to + +$$ S(t) = 1 - F(t) = \exp(-\lambda t). $$ + +Using $h(t) = \frac{f(t)}{S(t)}$, we get $h(t) = \lambda$. The hazard +function for the exponential distribution is constant. + +However, the exponential distribution is often too restrictive. In +principle, any positive continuous distribution (or transformation to +obtain such) can be used for the outcome. One of the most commonly used +distributions is the Weibull distribution, a two-parameter model +flexible enough to characterize both monotonic increasing and decreasing +hazard rates over time. Different parameterizations exist for this +distribution. Within `jmpost`, the survival and hazard functions are +implemented using the following parameterization: + +$$ S(t) = \exp(-(\lambda t)^\gamma) $$ + +$$ h(t) = \lambda \gamma \cdot t^{\gamma-1} $$ + +where $\gamma > 0$ is the shape parameter and $\lambda > 0$ is the scale +parameter. The density function can be derived from +$f(t) = h(t) \cdot S(t)$. + +Additionally, a log-logistic distribution can also be considered. Its +hazard and survival functions are: + +$$ S(t) = \frac{1}{1 + (t/b)^a} $$ + +$$ h(t) = \frac{(a/b) \cdot (t/b)^{a-1}}{1 + (t/b)^a} $$ + +where $a$ is the shape parameter and $b$ is the scale parameter. + +To summarize, the parameters involved in each type of parametric +survival model are recapitulated in [Table 1](#table1). + + Table 1. Parameters associated with each survival +model + +| Model | Parameter | Variable name | Interpretation | Default prior | +|---------------|---------------|---------------|---------------|---------------| +| Exponential | $\lambda$ | `sm_exp_lambda` | Time invariant hazard rate | xxx | +| Weibull | $\gamma$ | `sm_weibull_ph_gamma` | Shape | xxx | +| | $\lambda$ | `sm_weibull_ph_lambda` | Scale | xxx | +| Log-logistic | $a$ | `sm_loglogis_a` | Shape | xxx | +| | $b$ | `sm_loglogis_b` | Scale | xxx | + +### 2.3. Covariate model + +In many cases, additional characteristics of the individuals, such as +age, sex, tumor stage, etc., are known. These characteristics are +referred to as covariates. Let $X_i$ denote the $p \times 1$ vector of +values from the covariate matrix $X$ for individual $i$, and let $\beta$ +be a $p \times 1$ vector of regression coefficients. It is plausible to +assume that $X$ influences the shape, scale, or both parameters of the +model. However, a common approach is to assume that $X$ affects only the +scale parameter. The typical formulation for this relationship is: + +$$ +\log(\mu_i) = X_i^T \beta +$$ + +To ensure identifiability, we assume that the design vector $X_i$ does +not include a constant term; that is, no intercept is included in the +above equation. + +Various regression models can be fitted, such as proportional hazard +(PH) models, accelerated failure time (AFT) models, proportional odds +(PO) models, etc. Currently, `jmpost` implements only PH regression +models. PH models are defined as: + +$$ +h_i(t) = h_0(t) \cdot \exp(X_i^T \beta) +$$ + +where $h_0(t)$ is the baseline hazard function. + +The hazard ratio (HR) quantifies the relative increase in the hazard +associated with a unit increase in the relevant covariate, $X_i$, while +holding all other covariates in the model constant. + +### 2.4. Log-likelihood + +We can specify the relevant parameter(s) as $\theta$. In the case of the +exponential distribution, $\theta = \lambda$. For other distributions, +$\theta = (\mu, \alpha)$, where $\mu$ represents the location or scale +parameter, and $\alpha$ describes the shape or variance of the +distribution. The objective of the statistical analysis is the +estimation of $\theta$, which can then be used to obtain estimates for +all relevant quantities (e.g., the survival function). + +Based on the observed outcomes $\{T_i, d_i\}$ for $i = 1, \ldots, n$, +the likelihood function can be written as: + +$$ +L(\theta) = \prod_{i=1}^{n} f(T_i)^{d_i} \cdot S(T_i)^{1 - d_i} \cdot g(d_i) +$$ + +where $g(d_i)$ is the density function of $d_i$. We assume that $C_i$ +and consequently $d_i$ are independent of $T_i$. This type of censoring +is known as non-informative censoring. Hence, we will ignore the +$g(d_i)$ part afterwards. + +The maximum likelihood estimator maximizes the above log-likelihood +function. Given that $f(t) = h(t) \cdot S(t)$, the log-likelihood can +therefore be expressed as: + +$$ +\log L(\theta) = \sum_{i=1}^{n} \left[ d_i \cdot \log h(T_i) + \log S(T_i) \right] +$$ + +Since $H(t) = -\log S(t)$, the log-likelihood can also be expressed as: + +$$ +\log L(\theta) = \sum_{i=1}^{n} \left[ d_i \cdot \log h(T_i) - H(T_i) \right] +$$ + +### 2.5. Bayesian inference + +In a full Bayesian setting, the parameters are modeled directly using a +prior probability distribution, which is updated by the observed data +into a posterior distribution. The posterior distribution is the focus +of the inferential process. Therefore, when using a Bayesian framework, +the model must be completed by specifying suitable prior distributions +for the $\theta$ parameters. + +Within `jmpost`, all parameters have default priors assigned to them, +but users can explicitly specify the priors if they wish. The default +choice of prior distribution for the regression coefficients is +$N(\mu_\beta = 0, \sigma_\beta = 2)$, defined on the log scale. This +amounts to specifying a "minimally informative" prior on the regression +coefficients that determine the location parameter---in other words, we +are not including strong prior information in this aspect of our model. +The observed data (and the censoring structure) will primarily drive the +update to the posterior distribution. + +There are several choices of prior distributions for the model +parameters related to the baseline hazard (see [Table 1](#table1)). In +theory, any distribution defined in the $(0, +\infty)$ domain would be +viable as a prior distribution; however, the most common choices +include: + +- A half-normal, half-t, half-Cauchy, or exponential prior + distribution for the Weibull shape parameter. +- A half-normal, half-t, half-Cauchy, or exponential prior + distribution for the log-logistic scale parameter. + +### 2.6. Estimation + +The `jmpost` package is built on top of the `cmdstanr` R package (Stan +Development Team, 2019), the most advanced R interface for Stan. Stan, a +C++ library, provides a powerful platform for statistical modeling +(Carpenter et al., 2017) and uses the No-U-Turn Sampler (NUTS), a +specific implementation of Hamiltonian Monte Carlo (HMC) (Hoffman and +Gelman, 2014). Models in `jmpost` are written in the Stan programming +language, translated into C++ code, and compiled at runtime. + +Estimation in `jmpost` relies on full Bayesian inference. HMC, a form of +Markov chain Monte Carlo (MCMC), uses gradient information of the log +posterior to efficiently sample from the posterior space. + +Leveraging `cmdstanr`, `jmpost` allows users to control various aspects +of the estimation process, including the number of MCMC chains, the +number of warm-up and sampling iterations, and the number of computing +cores used. + +## 3. Implementation + +The design of `jmpost` is modular, with distinct sets of functions +tailored to specific objectives. These functions can be broadly +categorized into three main groups: **Data Preparation**, **Model +Fitting and Assessment**, and **Sensitivity Analysis and Predictions**, +as illustrated schematically in Figure 1. + +![Figure 1.](path/to/figure1.png) + +## 4. References + +### References + +- Baio, G. (2020). survHE: Survival analysis for health economic + evaluation and cost-effectiveness modeling. *Journal of Statistical + Software*, doi: + [10.18637/jss.v095.i14](https://doi.org/10.18637/jss.v095.i14). +- Carpenter, B., Gelman, A., Hoffman, M. D., et al. (2017). Stan: A + probabilistic programming language. *Journal of Statistical + Software*, doi: + [10.18637/jss.v076.i01](https://doi.org/10.18637/jss.v076.i01). +- Collett, D. (2003). *Modelling Survival Data in Medical Research*. + Chapman and Hall/CRC, Florida. +- Hoffman, M. D., & Gelman, A. (2014). The No-U-turn sampler: + Adaptively setting path lengths in Hamiltonian Monte Carlo. *Journal + of Machine Learning Research*, 15, 1593-1623. +- Hosmer, D. W., Lemeshow, S., & May, S. (2008). *Applied Survival + Analysis: Regression Modeling of Time-to-Event Data*. John Wiley & + Sons, New Jersey. +- Hsieh, F., Tseng, Y.-K., & Wang, J.-L. (2006). Joint modeling of + survival and longitudinal data: Likelihood approach revisited. + *Biometrics*, 62, 1037-1043. +- Hwang, W., & Pennell, M. L. (2014). Semiparametric Bayesian joint + modeling of a binary and continuous outcome with applications in + toxicological risk assessment. *Statistics in Medicine*, 33, + 1162-1175. +- Jackson, C. (2016). flexsurv: A platform for parametric survival + modeling in R. *Journal of Statistical Software*, doi: + [10.18637/jss.v070.i08](https://doi.org/10.18637/jss.v070.i08). +- Kalbfleisch, J. D., & Prentice, R. L. (2002). *The Statistical + Analysis of Failure Time Data*. John Wiley & Sons, New Jersey. +- Lawless, J. F. (2002). *Statistical Models and Methods for Lifetime + Data*. John Wiley & Sons, New Jersey. +- Rizopoulos, D. (2012). *Joint Models for Longitudinal and + Time-to-Event Data With Applications in R*. Chapman and Hall/CRC, + Florida. +- Wulfsohn, M. S., & Tsiatis, A. A. (1997). A joint model for survival + and longitudinal data measured with error. *Biometrics*, 53, + 330-339.