docs: 📝 content for session 2024-09-19 (#34)

* Draft 2024-09-19 * Update index.qmd * Add files via upload * Update index.qmd * Update index.qmd * rename to be slides instead * Apply suggestions from code review --------- Co-authored-by: Luke W. Johnston <[email protected]>
steno-aarhus · Sep 20, 2024 · 4973ac3 · 4973ac3
1 parent 0b0e6c4
commit 4973ac3
Show file tree

Hide file tree

Showing 4 changed files with 144 additions and 0 deletions.
diff --git a/sessions/2024-09-19/Caerphilly1.dta b/sessions/2024-09-19/Caerphilly1.dta
diff --git a/sessions/2024-09-19/index.qmd b/sessions/2024-09-19/index.qmd
@@ -0,0 +1,23 @@
+---
+title: "Fundamentals of time to event analysis"
+description: "Fundamentals of time to event analysis - Lexis-diagram, choice of time scale, as well as definition of key concepts like censoring, competing risks, Kaplan-Meier, cumulative incidence (with competing risks)"
+date: "2024-09-19"
+references:
+- type: article-journal
+  id: SchoberVetter2018
+  author:
+  - family:  Schober
+    given: Patrick
+  - family: Vetter
+    given: Thomas R 
+  issued: 2018
+  title: 'Survival Analysis and Interpretation of Time-to-Event Data: The Tortoise and the Hare'
+  DOI: 10.1213/ANE.0000000000003653
+---
+
+Files from session:
+
+- [Slides](slides.pdf)
+- [teaching_code](surv_example.R)
+
+Article discussed were [@SchoberVetter2018] 
diff --git a/sessions/2024-09-19/slides.pdf b/sessions/2024-09-19/slides.pdf
diff --git a/sessions/2024-09-19/surv_example.R b/sessions/2024-09-19/surv_example.R
@@ -0,0 +1,121 @@
+library(haven)
+library(knitr)
+library(dplyr)
+library(survival)
+library(ggplot2)
+library(tibble)
+library(ggsurvfit)
+
+# some of this is inspired by:
+# https://www.emilyzabor.com/tutorials/survival_analysis_in_r_tutorial.html
+
+# Change this to match your disk layout
+caerphilly_dat <- read_dta("D:/stovring/SDCA/EpiSpace_EpiStats/Survival_I/Caerphilly1.dta")
+
+# Select relevant variables
+caerphilly_dat <- caerphilly_dat %>%
+  select(id:diabetes, smoking)
+
+# Adding some basic variables on survival in study and smoking status
+caerphilly_dat <- caerphilly_dat %>%
+  mutate(
+    os_dur = as.numeric(difftime(pmin(dthdate, emdate, eosdate, na.rm = TRUE), examdate) / 365.25),
+    status = ifelse(is.na(dthdate), 0, 1),
+    cursmoker = ifelse(smoking >= 3, 1, 0)
+  )
+
+# A survival function object
+s1 <- survfit(Surv(os_dur, status) ~ 1, data = caerphilly_dat)
+
+str(s1)
+
+# Simple Kaplan-Meier plot, time in study as time scale
+
+survfit2(Surv(os_dur, status) ~ 1, data = caerphilly_dat) %>%
+  ggsurvfit() +
+  labs(
+    x = "Years",
+    y = "Overall survival probability"
+  ) +
+  add_confidence_interval() +
+  add_risktable()
+
+# As above, but divided into current smokers (1) vs non/ex-smokers (0)
+survfit2(Surv(os_dur, status) ~ cursmoker, data = caerphilly_dat) %>%
+  ggsurvfit() +
+  labs(
+    x = "Years",
+    y = "Overall survival probability"
+  ) +
+  add_confidence_interval() +
+  add_risktable()
+
+# Log-rank test for difference in survival
+survdiff(Surv(os_dur, status) ~ cursmoker, data = caerphilly_dat)
+
+
+# Age as time scale - creating variables
+caerphilly_dat <- caerphilly_dat %>%
+  mutate(
+    agein = as.numeric(difftime(examdate, birthdate)) / 365.25,
+    ageout = as.numeric(difftime(
+      pmin(dthdate, emdate, eosdate, na.rm = TRUE), birthdate)) / 365.25
+    )
+
+# Kaplan-Meier plot, age as time scale, note how the x-axis is truncated
+survfit2(Surv(agein, ageout, status) ~ 1, data = caerphilly_dat) %>%
+  ggsurvfit() +
+  labs(
+    x = "Years",
+    y = "Overall survival probability"
+  ) +
+  scale_x_continuous(expand = c(0,0), limits = c(45, 85),
+                     breaks = seq(45, 85, 10)) +
+  scale_y_continuous(expand = c(0,0), limits = c(0,1),
+                     breaks = seq(0, 1, .2)) +
+  add_confidence_interval() +
+  add_risktable()
+
+# Kaplan-Meier plot, age as time scale, by current smoking status
+survfit2(Surv(agein, ageout, status) ~ cursmoker, data = caerphilly_dat) %>%
+  ggsurvfit() +
+  labs(
+    x = "Years",
+    y = "Overall survival probability"
+  ) +
+  scale_x_continuous(expand = c(0,0), limits = c(45, 80),
+                     breaks = seq(45, 80, 10)) +
+  scale_y_continuous(expand = c(0,0), limits = c(0,1),
+                     breaks = seq(0, 1, .2)) +
+  add_confidence_interval() +
+  add_risktable()
+
+# Example with competing risk
+library(tidycmprsk)
+# Time in study as time scale - time to MI (0 censored, 1 MI, 2 death)
+caerphilly_dat <- caerphilly_dat %>%
+  mutate(
+   midur = as.numeric(difftime(
+      pmin(dthdate, midate, emdate, eosdate, na.rm = TRUE), examdate)) / 365.25,
+    mistatus = as.factor(ifelse(!is.na(midate), 1, 0)
+      + 2 * ifelse(!is.na(dthdate) & is.na(midate), 1, 0)),
+    mistatus_num = as.numeric(mistatus=="1")
+  )
+
+cuminc(Surv(midur, mistatus) ~ 1, data = caerphilly_dat) %>%
+  ggcuminc(outcome = c("1")) +
+  ylim(c(0, .3)) +
+  labs(
+    x = "Years"
+  )
+
+# Incorrect - death used as censoring
+survfit2(Surv(midur, mistatus_num) ~ 1, data = caerphilly_dat) %>%
+  ggsurvfit() +
+  labs(
+    x = "Years",
+    y = "MI cumulative incidence (censored for death)"
+  ) +
+  add_confidence_interval() +
+  add_risktable()
+