Skip to content

Commit

Permalink
add alternative ROC in-class task
Browse files Browse the repository at this point in the history
  • Loading branch information
giuseppec committed Dec 4, 2024
1 parent bef9851 commit 03ba665
Showing 1 changed file with 149 additions and 0 deletions.
149 changes: 149 additions & 0 deletions exercises/evaluation-tex/ex_rnw/ex_roc-curve_2.Rnw
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
In this session, you will learn to:
\begin{itemize}
\item Construct and interpret ROC curves.
\item Calculate the Area Under the Curve (AUC).
\item Analyze the impact of data imbalance and decision thresholds on model performance.
\item Collaborate with peers to compare results and discuss findings.
\end{itemize}


Below a table that consist of true labels (\texttt{y}) and predicted probabilities of four different classifiers ($\hat\pi_1, \hat\pi_2, \hat\pi_3, \hat\pi_4$) generated from hypothetical models.

<<echo=FALSE, results='asis'>>=
# Define the actual labels and predicted probabilities
y <- c(1, 1, 1, 1, 0, 0, 0)
# Predicted probabilities for different models
pi_1 <- c(0.99, 0.6, 0.95, 0.7, 0.8, 0.1, 0.3)
pi_2 <- c(0.1, 0.05, 0.07, 0.15, 0.01, 0.08, 0.02)
pi_3 <- c(0.01, 0.4, 0.05, 0.3, 0.2, 0.9, 0.7)
pi_4 <- c(0.7, 0.9, 0.2, 0.8, 0.5, 0.1, 0.3)
# Specify column names with LaTeX math mode
col_names <- c("$y$", "$\\hat\\pi_1$", "$\\hat\\pi_2$", "$\\hat\\pi_3$", "$\\hat\\pi_4$")
# Output datasets as LaTeX tables
kable(
data.frame(
y = y,
pi_1,
pi_2,
pi_3,
pi_4
),
format = "latex",
escape = FALSE,
col.names = col_names
)
@



\textbf{Tasks}

\begin{itemize}
\item Step 1:
Watch as the instructor demonstrates how to plot the ROC curve using $\hat{\pi}_1$ and explains the steps.

\item Step 2: Form groups of 4-6 people and
\begin{itemize}
\item Complete the ROC curve for $\hat{\pi}_1$.
\item Plot the ROC curves for $\hat{\pi}_2$, $\hat{\pi}_3$, and $\hat{\pi}_4$.
\item Manually calculate the AUC for each classifier and compare the results.
\end{itemize}

\item Step 3: Within your group, discuss:
\begin{itemize}
\item How the differences in predictions affect the ROC curves and AUC values.
\item Key takeaways from comparing classifiers.
\end{itemize}

\item Step 4: Formulate 1-2 challenging TRUE-FALSE questions about ROC curves. Nominate a group leader to present one question to the class and explain its relevance.
\end{itemize}

%
% \begin{itemize}
% \item The instructor will use $\pi_1$ to explain how to start with plotting a ROC curve.
% \item Form groups of 4-6 people and complete the ROC curve for $\hat\pi_1$ and plot the ROC curves for the other classifers $\hat\pi_2, $\hat\pi_3, $\hat\pi_4$.
% \item For each classifier, calculate the AUC manually based on the plotted ROC curves and compare the results.
% \item In your group, discuss the differences of the classifier predictions and their impact on the curve.
% \item Formulate 1-2 short TRUE-FALSE questions around the topic of ROC curves within your group (e.g., based on the insights you gained or more general from the lecture slides on ROC curves).
% \item Nominate a group leader within you group that will present the TRUE-FALSE question your group came up with.
% \end{itemize}

\newpage
\section{Classifer 1}
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
source("../../../slides/evaluation/rsrc/plot_roc.R")
# Classifier 1
df1 <- data.frame(
"#" = 1:7,
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
Score = c(0.99, 0.6, 0.95, 0.7, 0.8, 0.1, 0.3)
)
names(df1) <- c("#", "Truth", "Score")
# Classifier 2
df2 <- data.frame(
"#" = 1:7,
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
Score = c(0.1, 0.05, 0.07, 0.15, 0.01, 0.08, 0.02)
)
names(df2) <- c("#", "Truth", "Score")
# Classifier 3
df3 <- data.frame(
"#" = 1:7,
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
Score = c(0.01, 0.4, 0.05, 0.3, 0.2, 0.9, 0.7)
)
names(df3) <- c("#", "Truth", "Score")
# Classifier 4
df4 <- data.frame(
"#" = 1:7,
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"),
Score = c(0.7, 0.9, 0.2, 0.8, 0.5, 0.1, 0.3)
)
names(df4) <- c("#", "Truth", "Score")
# Print the data frames
# df1
# df2
# df3
# df4
stepROC = function(df) {
sorted = sort(df$Score, decreasing = T)
sorted = c(1, sorted, 0)
for(i in 1:(nrow(df) + 1))
plotROC(df[order(df$Score, decreasing = T),], threshold = (sorted[i]+sorted[i+1])/2 )
}
stepROC(df1)
library(mlr3verse)
auc = msr("classif.auc")
auc$fun(as.factor(df1$Truth), df1$Score, positive = "Pos")
@

\newpage
\section{Classifer 2}
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
stepROC(df2)
auc$fun(as.factor(df2$Truth), df2$Score, positive = "Pos")
@

\newpage
\section{Classifer 3}
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
stepROC(df3)
auc$fun(as.factor(df3$Truth), df3$Score, positive = "Pos")
@

\newpage
\section{Classifer 4}
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>=
stepROC(df4)
auc$fun(as.factor(df4$Truth), df4$Score, positive = "Pos")
@

0 comments on commit 03ba665

Please sign in to comment.