add alternative ROC in-class task

slds-lmu · Dec 4, 2024 · 03ba665 · 03ba665
1 parent bef9851
commit 03ba665
Showing 1 changed file with 149 additions and 0 deletions.
diff --git a/exercises/evaluation-tex/ex_rnw/ex_roc-curve_2.Rnw b/exercises/evaluation-tex/ex_rnw/ex_roc-curve_2.Rnw
@@ -0,0 +1,149 @@
+In this session, you will learn to:
+\begin{itemize}
+  \item Construct and interpret ROC curves.
+  \item Calculate the Area Under the Curve (AUC).
+  \item Analyze the impact of data imbalance and decision thresholds on model performance.
+  \item Collaborate with peers to compare results and discuss findings.
+\end{itemize}
+
+
+Below a table that consist of true labels (\texttt{y}) and predicted probabilities of four different classifiers ($\hat\pi_1, \hat\pi_2, \hat\pi_3, \hat\pi_4$) generated from hypothetical models.
+
+<<echo=FALSE, results='asis'>>=
+# Define the actual labels and predicted probabilities
+y <- c(1, 1, 1, 1, 0, 0, 0)
+
+# Predicted probabilities for different models
+pi_1 <- c(0.99, 0.6, 0.95, 0.7, 0.8, 0.1, 0.3)
+pi_2 <- c(0.1, 0.05, 0.07, 0.15, 0.01, 0.08, 0.02)
+pi_3 <- c(0.01, 0.4, 0.05, 0.3, 0.2, 0.9, 0.7)
+pi_4 <- c(0.7, 0.9, 0.2, 0.8, 0.5, 0.1, 0.3)
+
+# Specify column names with LaTeX math mode
+col_names <- c("$y$", "$\\hat\\pi_1$", "$\\hat\\pi_2$", "$\\hat\\pi_3$", "$\\hat\\pi_4$")
+
+# Output datasets as LaTeX tables
+kable(
+  data.frame(
+    y = y,
+    pi_1,
+    pi_2,
+    pi_3,
+    pi_4
+  ), 
+  format = "latex",
+  escape = FALSE,
+  col.names = col_names
+)
+@
+
+
+
+\textbf{Tasks}
+
+\begin{itemize} 
+\item Step 1: 
+Watch as the instructor demonstrates how to plot the ROC curve using $\hat{\pi}_1$ and explains the steps.
+
+\item Step 2: Form groups of 4-6 people and
+\begin{itemize} 
+\item Complete the ROC curve for $\hat{\pi}_1$. 
+\item Plot the ROC curves for $\hat{\pi}_2$, $\hat{\pi}_3$, and $\hat{\pi}_4$.
+\item Manually calculate the AUC for each classifier and compare the results. 
+\end{itemize}
+
+\item Step 3: Within your group, discuss:
+\begin{itemize} 
+\item How the differences in predictions affect the ROC curves and AUC values. 
+\item Key takeaways from comparing classifiers. 
+\end{itemize}
+
+\item Step 4: Formulate 1-2 challenging TRUE-FALSE questions about ROC curves. Nominate a group leader to present one question to the class and explain its relevance. 
+\end{itemize}
+
+% 
+% \begin{itemize}
+%   \item The instructor will use $\pi_1$ to explain how to start with plotting a ROC curve.
+%   \item Form groups of 4-6 people and complete the ROC curve for $\hat\pi_1$ and plot the ROC curves for the other classifers $\hat\pi_2, $\hat\pi_3, $\hat\pi_4$.
+%   \item For each classifier, calculate the AUC manually based on the plotted ROC curves and compare the results.
+%   \item In your group, discuss the differences of the classifier predictions and their impact on the curve.
+%   \item Formulate 1-2 short TRUE-FALSE questions around the topic of ROC curves within your group (e.g., based on the insights you gained or more general from the lecture slides on ROC curves).
+%   \item Nominate a group leader within you group that will present the TRUE-FALSE question your group came up with.
+% \end{itemize}
+
+\newpage
+\section{Classifer 1}
+<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
+source("../../../slides/evaluation/rsrc/plot_roc.R")
+# Classifier 1
+df1 <- data.frame(
+  "#" = 1:7,
+  Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
+  Score = c(0.99, 0.6, 0.95, 0.7, 0.8, 0.1, 0.3)
+)
+names(df1) <- c("#", "Truth", "Score")
+
+# Classifier 2
+df2 <- data.frame(
+  "#" = 1:7,
+  Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
+  Score = c(0.1, 0.05, 0.07, 0.15, 0.01, 0.08, 0.02)
+)
+names(df2) <- c("#", "Truth", "Score")
+
+# Classifier 3
+df3 <- data.frame(
+  "#" = 1:7,
+  Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
+  Score = c(0.01, 0.4, 0.05, 0.3, 0.2, 0.9, 0.7)
+)
+names(df3) <- c("#", "Truth", "Score")
+
+# Classifier 4
+df4 <- data.frame(
+  "#" = 1:7,
+  Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
+  Score = c(0.7, 0.9, 0.2, 0.8, 0.5, 0.1, 0.3)
+)
+names(df4) <- c("#", "Truth", "Score")
+
+# Print the data frames
+# df1
+# df2
+# df3
+# df4
+
+
+stepROC = function(df) {
+  sorted = sort(df$Score, decreasing = T)
+  sorted = c(1, sorted, 0)
+  for(i in 1:(nrow(df) + 1))
+    plotROC(df[order(df$Score, decreasing = T),], threshold = (sorted[i]+sorted[i+1])/2 )
+}
+
+stepROC(df1)
+library(mlr3verse)
+auc = msr("classif.auc")
+auc$fun(as.factor(df1$Truth), df1$Score, positive = "Pos")
+@
+
+\newpage
+\section{Classifer 2}
+<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
+stepROC(df2)
+auc$fun(as.factor(df2$Truth), df2$Score, positive = "Pos")
+@
+
+\newpage
+\section{Classifer 3}
+<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
+stepROC(df3)
+auc$fun(as.factor(df3$Truth), df3$Score, positive = "Pos")
+@
+
+\newpage
+\section{Classifer 4}
+<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
+stepROC(df4)
+auc$fun(as.factor(df4$Truth), df4$Score, positive = "Pos")
+@