-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
149 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
In this session, you will learn to: | ||
\begin{itemize} | ||
\item Construct and interpret ROC curves. | ||
\item Calculate the Area Under the Curve (AUC). | ||
\item Analyze the impact of data imbalance and decision thresholds on model performance. | ||
\item Collaborate with peers to compare results and discuss findings. | ||
\end{itemize} | ||
|
||
|
||
Below a table that consist of true labels (\texttt{y}) and predicted probabilities of four different classifiers ($\hat\pi_1, \hat\pi_2, \hat\pi_3, \hat\pi_4$) generated from hypothetical models. | ||
|
||
<<echo=FALSE, results='asis'>>= | ||
# Define the actual labels and predicted probabilities | ||
y <- c(1, 1, 1, 1, 0, 0, 0) | ||
# Predicted probabilities for different models | ||
pi_1 <- c(0.99, 0.6, 0.95, 0.7, 0.8, 0.1, 0.3) | ||
pi_2 <- c(0.1, 0.05, 0.07, 0.15, 0.01, 0.08, 0.02) | ||
pi_3 <- c(0.01, 0.4, 0.05, 0.3, 0.2, 0.9, 0.7) | ||
pi_4 <- c(0.7, 0.9, 0.2, 0.8, 0.5, 0.1, 0.3) | ||
# Specify column names with LaTeX math mode | ||
col_names <- c("$y$", "$\\hat\\pi_1$", "$\\hat\\pi_2$", "$\\hat\\pi_3$", "$\\hat\\pi_4$") | ||
# Output datasets as LaTeX tables | ||
kable( | ||
data.frame( | ||
y = y, | ||
pi_1, | ||
pi_2, | ||
pi_3, | ||
pi_4 | ||
), | ||
format = "latex", | ||
escape = FALSE, | ||
col.names = col_names | ||
) | ||
@ | ||
|
||
|
||
|
||
\textbf{Tasks} | ||
|
||
\begin{itemize} | ||
\item Step 1: | ||
Watch as the instructor demonstrates how to plot the ROC curve using $\hat{\pi}_1$ and explains the steps. | ||
|
||
\item Step 2: Form groups of 4-6 people and | ||
\begin{itemize} | ||
\item Complete the ROC curve for $\hat{\pi}_1$. | ||
\item Plot the ROC curves for $\hat{\pi}_2$, $\hat{\pi}_3$, and $\hat{\pi}_4$. | ||
\item Manually calculate the AUC for each classifier and compare the results. | ||
\end{itemize} | ||
|
||
\item Step 3: Within your group, discuss: | ||
\begin{itemize} | ||
\item How the differences in predictions affect the ROC curves and AUC values. | ||
\item Key takeaways from comparing classifiers. | ||
\end{itemize} | ||
|
||
\item Step 4: Formulate 1-2 challenging TRUE-FALSE questions about ROC curves. Nominate a group leader to present one question to the class and explain its relevance. | ||
\end{itemize} | ||
|
||
% | ||
% \begin{itemize} | ||
% \item The instructor will use $\pi_1$ to explain how to start with plotting a ROC curve. | ||
% \item Form groups of 4-6 people and complete the ROC curve for $\hat\pi_1$ and plot the ROC curves for the other classifers $\hat\pi_2, $\hat\pi_3, $\hat\pi_4$. | ||
% \item For each classifier, calculate the AUC manually based on the plotted ROC curves and compare the results. | ||
% \item In your group, discuss the differences of the classifier predictions and their impact on the curve. | ||
% \item Formulate 1-2 short TRUE-FALSE questions around the topic of ROC curves within your group (e.g., based on the insights you gained or more general from the lecture slides on ROC curves). | ||
% \item Nominate a group leader within you group that will present the TRUE-FALSE question your group came up with. | ||
% \end{itemize} | ||
|
||
\newpage | ||
\section{Classifer 1} | ||
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>= | ||
source("../../../slides/evaluation/rsrc/plot_roc.R") | ||
# Classifier 1 | ||
df1 <- data.frame( | ||
"#" = 1:7, | ||
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"), | ||
Score = c(0.99, 0.6, 0.95, 0.7, 0.8, 0.1, 0.3) | ||
) | ||
names(df1) <- c("#", "Truth", "Score") | ||
# Classifier 2 | ||
df2 <- data.frame( | ||
"#" = 1:7, | ||
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"), | ||
Score = c(0.1, 0.05, 0.07, 0.15, 0.01, 0.08, 0.02) | ||
) | ||
names(df2) <- c("#", "Truth", "Score") | ||
# Classifier 3 | ||
df3 <- data.frame( | ||
"#" = 1:7, | ||
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"), | ||
Score = c(0.01, 0.4, 0.05, 0.3, 0.2, 0.9, 0.7) | ||
) | ||
names(df3) <- c("#", "Truth", "Score") | ||
# Classifier 4 | ||
df4 <- data.frame( | ||
"#" = 1:7, | ||
Truth = c("Pos", "Pos", "Pos", "Pos", "Neg", "Neg", "Neg"), | ||
Score = c(0.7, 0.9, 0.2, 0.8, 0.5, 0.1, 0.3) | ||
) | ||
names(df4) <- c("#", "Truth", "Score") | ||
# Print the data frames | ||
# df1 | ||
# df2 | ||
# df3 | ||
# df4 | ||
stepROC = function(df) { | ||
sorted = sort(df$Score, decreasing = T) | ||
sorted = c(1, sorted, 0) | ||
for(i in 1:(nrow(df) + 1)) | ||
plotROC(df[order(df$Score, decreasing = T),], threshold = (sorted[i]+sorted[i+1])/2 ) | ||
} | ||
stepROC(df1) | ||
library(mlr3verse) | ||
auc = msr("classif.auc") | ||
auc$fun(as.factor(df1$Truth), df1$Score, positive = "Pos") | ||
@ | ||
|
||
\newpage | ||
\section{Classifer 2} | ||
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>= | ||
stepROC(df2) | ||
auc$fun(as.factor(df2$Truth), df2$Score, positive = "Pos") | ||
@ | ||
|
||
\newpage | ||
\section{Classifer 3} | ||
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>= | ||
stepROC(df3) | ||
auc$fun(as.factor(df3$Truth), df3$Score, positive = "Pos") | ||
@ | ||
|
||
\newpage | ||
\section{Classifer 4} | ||
<<fig.height=2.25, echo = FALSE, warning=FALSE, message=FALSE>>= | ||
stepROC(df4) | ||
auc$fun(as.factor(df4$Truth), df4$Score, positive = "Pos") | ||
@ |