From b4e5c8ef08d66ce9ceba322cdac55436eba599bf Mon Sep 17 00:00:00 2001
From: "Brenton M. Wiernik" <bwiernik@users.noreply.github.com>
Date: Thu, 14 Jul 2022 10:28:18 -0400
Subject: [PATCH 1/5] Draft diagnostics vignette

---
 vignettes/diagnostics.Rmd | 285 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 285 insertions(+)
 create mode 100644 vignettes/diagnostics.Rmd

diff --git a/vignettes/diagnostics.Rmd b/vignettes/diagnostics.Rmd
new file mode 100644
index 000000000..120d7a976
--- /dev/null
+++ b/vignettes/diagnostics.Rmd
@@ -0,0 +1,285 @@
+---
+title: "Graphical model diagnostics"
+output: 
+  rmarkdown::html_vignette:
+    toc: true
+    fig_width: 10.08
+    fig_height: 6
+tags: [r, regression, modeling, diagnositics, pp_check, check, assumptions]
+vignette: >
+  \usepackage[utf8]{inputenc}
+  %\VignetteIndexEntry{Graphical model diagnostics}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options: 
+  chunk_output_type: console
+bibliography: bibliography.bib
+csl: apa.csl
+---
+
+This vignette can be referred to by citing the package:
+
+- citation
+- citation
+
+---
+
+```{r message=FALSE, warning=FALSE, include=FALSE}
+if (
+  !requireNamespace("see", quietly = TRUE) ||
+  !requireNamespace("performance", quietly = TRUE) ||
+  !requireNamespace("ggplot2", quietly = TRUE) || 
+  !requireNamespace("qqplotr", quietly = TRUE)
+) {
+  knitr::opts_chunk$set(eval = FALSE)
+}
+
+library(knitr)
+knitr::opts_chunk$set(comment = ">")
+options(knitr.kable.NA = "", digits = 2)
+
+set.seed(333)
+```
+
+# Model diagnostics
+
+A critical step in statistical modeling is *model diagnostics*—
+checks made to evaluate model assumptions and ensure that predictions and inferences made based on a model are reliable. 
+Model diagnostics can include global evaluations of the fit of a model to observed data,
+as well as checks of specific model assumptions, such as linearity, variance homogeneity, or normality of residuals.
+
+A variety of methods for model diagnostics are available. 
+A useful family of model diagnostics methods are **graphical methods**.
+Graphical methods visualize information about the model and allow modelers to quickly check a variety of model assumptions using visual inspection (the "eyeball test"). 
+
+Another approach to model diagnostics are **statistical hypothesis tests**, 
+such as the Levene test for homogeneity of variance or the Shapiro test for normality.
+These tests are often problematic—they are frequently highly sensitive to sample size (being either overpowered or underpowered),
+and they provide little information about the nature, size, or importance of assumption violations.
+For example, the Shapiro test for normality of residuals frequently has *p* < .05 even when deviations from normality are minor and have no impact on the validity of model inferences or predictions.
+Compared to statistical hypothesis checks of model assumptions, 
+graphical methods are often more robust to sample size and 
+more informative about the size, nature, and impact of assumption violations.
+With graphical methods, modelers can better assess whether assumption violations are major and must be addressed,
+or minor and can safely be ignored.
+
+In this vignette, we present a variety of diagnostic tools provided in the *performance* package,
+with an emphasis on graphical methods and diagnostic plots. 
+We discuss assumptions made by different types of models, 
+how aspects and assumptions of models can be visualized,
+and how to use diagnostic plots to determine if a model is performing well or poorly.
+The vignette is organized by type of statistical model, 
+as different types of models make different assumptions 
+and may use different types of diagnostic plots.
+
+# Linear models
+
+Linear models include linear regression and many common statistical tests,
+such as *t*-tests, correlations, ANOVA, ANCOVA, and *χ*^2^ tests.
+
+Linear models assume a *normal likelihood*, which means that they assume that residuals on the response variable (*y*) are normally distributed around theur predicted values after accounting for any predictor variables (*x*).
+
+Linear models make the following assumptions:
+
+1. Validity
+
+2. Linearity 
+
+3. Homogeneity of residual variance
+
+4. Normality of residuals
+
+In addition to these assumptions, depending on the purpose of the model, 
+we might also be concerned with additional potential sources of problems with our model.
+
+5. Endogeneity
+
+6. Influential observations and outliers
+
+7. Collinearity
+
+Many of these assumptions can be checked using the `check_moodel()` function.
+We will demonstrate this function using the `mtcars` dataset.
+This dataset includes features about 32 different car models,
+as well as their fuel economy (miles per gallon).
+
+```{r message=FALSE, warning=FALSE, echo=FALSE, fig.cap="Correlation between the frequentist p-value and the probability of direction (pd)", fig.align='center'}
+library(performance)
+
+dat <- mtcars
+
+
+```
+
+ 
+
+
+```{r message=FALSE, warning=FALSE, echo=FALSE, fig.cap="Correlation between the frequentist p-value and the probability of direction (pd)", fig.align='center'}
+library(ggplot2)
+library(see)
+
+raw <- read.csv("https://raw.github.com/easystats/easystats/master/publications/makowski_2019_bayesian/data/data.csv") 
+dat <- transform(
+  raw,
+  effect_existence = ifelse(true_effect == 1, "Presence of true effect", "Absence of true effect"),
+  p_direction = p_direction * 100
+) 
+ggplot(dat, aes(x = p_direction, y = p_value, color = effect_existence)) +
+  geom_point2(alpha = 0.1) +
+  geom_segment(aes(x = 95, y = Inf, xend = 95, yend = 0.1), color = "black", linetype = "longdash") +
+  geom_segment(aes(x = -Inf, y = 0.1, xend = 95, yend = 0.1), color = "black", linetype = "longdash") +
+  geom_segment(aes(x = 97.5, y = Inf, xend = 97.5, yend = 0.05), color = "black", linetype = "dashed") +
+  geom_segment(aes(x = -Inf, y = 0.05, xend = 97.5, yend = 0.05), color = "black", linetype = "dashed") +
+  theme_modern() +
+  scale_y_reverse(breaks = c(0.05, round(seq(0, 1, length.out = 11), digits = 2))) +
+  scale_x_continuous(breaks = c(95, 97.5, round(seq(50, 100, length.out = 6)))) +
+  scale_color_manual(values = c("Presence of true effect" = "green", "Absence of true effect" = "red")) +
+  theme(legend.title = element_blank()) +
+  guides(colour = guide_legend(override.aes = list(alpha = 1))) +
+  xlab("Probability of Direction (pd)") +
+  ylab("Frequentist p-value")
+```
+
+
+
+> **But if it's like the *p*-value, it must be bad because the *p*-value is bad [*insert reference to the reproducibility crisis*].**
+
+In fact, this aspect of the reproducibility crisis might have been
+misunderstood. Indeed, it is not that the *p*-value is an intrinsically bad or
+wrong. Instead, it is its **misuse**, **misunderstanding** and
+**misinterpretation** that fuels the decay of the situation. For instance, the
+fact that the **pd** is highly correlated with the *p*-value suggests that the
+latter is more an index of effect *existence* than *significance* (*i.e.*,
+"worth of interest"). The Bayesian version, the **pd**, has an intuitive meaning
+and makes obvious the fact that **all thresholds are arbitrary**. Additionally,
+the **mathematical and interpretative transparency** of the **pd**, and its
+reconceptualisation as an index of effect existence, offers a valuable insight
+into the characterization of Bayesian results. Moreover, its concomitant
+proximity with the frequentist *p*-value makes it a perfect metric to ease the
+transition of psychological research into the adoption of the Bayesian
+framework.
+
+# Methods of computation
+
+The most **simple and direct** way to compute the **pd** is to 1) look at the
+median's sign, 2) select the portion of the posterior of the same sign and 3)
+compute the percentage that this portion represents. This "simple" method is the
+most straightforward, but its precision is directly tied to the number of
+posterior draws.
+
+The second approach relies on [**density estimation**](https://easystats.github.io/bayestestR/reference/estimate_density.html).
+It starts by estimating the density function (for which many methods are
+available), and then computing the [**area under the curve**](https://easystats.github.io/bayestestR/reference/area_under_curve.html)
+(AUC) of the density curve on the other side of 0. The density-based method
+could hypothetically be considered as more precise, but strongly depends on the
+method used to estimate the density function.
+
+# Methods comparison
+
+Let's compare the 4 available methods, the **direct** method and 3
+**density-based** methods differing by their density estimation algorithm (see
+[`estimate_density`](https://easystats.github.io/bayestestR/reference/estimate_density.html)).
+
+## Correlation
+
+Let's start by testing the proximity and similarity of the results obtained by different methods.
+
+```{r message=FALSE, warning=FALSE, fig.align='center'}
+library(bayestestR)
+library(logspline)
+library(KernSmooth)
+
+# Compute the correlations
+data <- data.frame()
+for (the_mean in runif(25, 0, 4)) {
+  for (the_sd in runif(25, 0.5, 4)) {
+    x <- rnorm(100, the_mean, abs(the_sd))
+    data <- rbind(
+      data,
+      data.frame(
+        "direct" = pd(x),
+        "kernel" = pd(x, method = "kernel"),
+        "logspline" = pd(x, method = "logspline"),
+        "KernSmooth" = pd(x, method = "KernSmooth")
+      )
+    )
+  }
+}
+data <- as.data.frame(sapply(data, as.numeric))
+
+# Visualize the correlations
+bayesplot::mcmc_pairs(data) +
+  theme_classic()
+```
+
+All methods give are highly correlated and give very similar results. That means
+that the method choice is not a drastic game changer and cannot be used to tweak
+the results too much.
+
+## Accuracy
+
+To test the accuracy of each methods, we will start by computing the **direct
+*pd*** from a very dense distribution (with a large amount of observations).
+This will be our baseline, or "true" *pd*. Then, we will iteratively draw
+smaller samples from this parent distribution, and we will compute the *pd* with
+different methods. The closer this estimate is from the reference one, the
+better.
+
+```{r message=FALSE, warning=FALSE}
+data <- data.frame()
+for (i in 1:25) {
+  the_mean <- runif(1, 0, 4)
+  the_sd <- abs(runif(1, 0.5, 4))
+  parent_distribution <- rnorm(100000, the_mean, the_sd)
+  true_pd <- pd(parent_distribution)
+
+  for (j in 1:25) {
+    sample_size <- round(runif(1, 25, 5000))
+    subsample <- sample(parent_distribution, sample_size)
+    data <- rbind(
+      data,
+      data.frame(
+        "sample_size" = sample_size,
+        "true" = true_pd,
+        "direct" = pd(subsample) - true_pd,
+        "kernel" = pd(subsample, method = "kernel") - true_pd,
+        "logspline" = pd(subsample, method = "logspline") - true_pd,
+        "KernSmooth" = pd(subsample, method = "KernSmooth") - true_pd
+      )
+    )
+  }
+}
+data <- as.data.frame(sapply(data, as.numeric))
+```
+
+```{r message=FALSE, warning=FALSE, fig.align='center'}
+library(datawizard) # for reshape_longer
+data %>%
+  reshape_longer(cols = 3:6, colnames_to = "Method", values_to = "Distance") %>%
+  ggplot(aes(x = sample_size, y = Distance, color = Method, fill = Method)) +
+  geom_point(alpha = 0.3, stroke = 0, shape = 16) +
+  geom_smooth(alpha = 0.2) +
+  geom_hline(yintercept = 0) +
+  theme_classic() +
+  xlab("\nDistribution Size")
+```
+
+The "Kernel" based density methods seems to consistently underestimate the *pd*. Interestingly, the "direct" method appears as being the more reliable, even in the case of small number of posterior draws.
+
+
+## Can the pd be 100\%?
+
+`p = 0.000` is coined as one of the term to avoid when reporting results
+[@lilienfeld2015fifty], even if often displayed by statistical software. The
+rationale is that for every probability distribution, there is no value with a
+probability of exactly 0. There is always some infinitesimal probability
+associated with each data point, and the `p = 0.000` returned by software is due
+to approximations related, among other, to finite memory hardware.
+
+One could apply this rationale for the *pd*: since all data points have a
+non-null probability density, then the *pd* (a particular portion of the
+probability density) can *never* be 100\%. While this is an entirely valid
+point, people using the *direct* method might argue that their *pd* is based on
+the posterior draws, rather than on the theoretical, hidden, true posterior
+distribution (which is only approximated by the posterior draws). These
+posterior draws represent a finite sample for which `pd = 100%` is a valid
+statement.

From 99bd8dfe8d247c16ba4a20dc43c56f9df3660082 Mon Sep 17 00:00:00 2001
From: "Brenton M. Wiernik" <bwiernik@users.noreply.github.com>
Date: Mon, 10 Oct 2022 14:00:48 -0400
Subject: [PATCH 2/5] typo

---
 vignettes/diagnostics.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/diagnostics.Rmd b/vignettes/diagnostics.Rmd
index 120d7a976..2f1dd7a46 100644
--- a/vignettes/diagnostics.Rmd
+++ b/vignettes/diagnostics.Rmd
@@ -76,7 +76,7 @@ and may use different types of diagnostic plots.
 Linear models include linear regression and many common statistical tests,
 such as *t*-tests, correlations, ANOVA, ANCOVA, and *χ*^2^ tests.
 
-Linear models assume a *normal likelihood*, which means that they assume that residuals on the response variable (*y*) are normally distributed around theur predicted values after accounting for any predictor variables (*x*).
+Linear models assume a *normal likelihood*, which means that they assume that residuals on the response variable (*y*) are normally distributed around their predicted values after accounting for any predictor variables (*x*).
 
 Linear models make the following assumptions:
 

From bda22048e15a12b0d05a2725b8be383bb6be9959 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 9 Apr 2023 08:46:16 +0200
Subject: [PATCH 3/5] use native pipe

---
 vignettes/diagnostics.Rmd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vignettes/diagnostics.Rmd b/vignettes/diagnostics.Rmd
index 2f1dd7a46..c5541d1af 100644
--- a/vignettes/diagnostics.Rmd
+++ b/vignettes/diagnostics.Rmd
@@ -253,8 +253,8 @@ data <- as.data.frame(sapply(data, as.numeric))
 
 ```{r message=FALSE, warning=FALSE, fig.align='center'}
 library(datawizard) # for reshape_longer
-data %>%
-  reshape_longer(cols = 3:6, colnames_to = "Method", values_to = "Distance") %>%
+data |>
+  reshape_longer(cols = 3:6, colnames_to = "Method", values_to = "Distance") |>
   ggplot(aes(x = sample_size, y = Distance, color = Method, fill = Method)) +
   geom_point(alpha = 0.3, stroke = 0, shape = 16) +
   geom_smooth(alpha = 0.2) +

From 88680731d93ac99f4dbaf6cde4bc2adb63867533 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 9 Apr 2023 08:49:41 +0200
Subject: [PATCH 4/5] update argument names

---
 vignettes/diagnostics.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/diagnostics.Rmd b/vignettes/diagnostics.Rmd
index c5541d1af..d894483e6 100644
--- a/vignettes/diagnostics.Rmd
+++ b/vignettes/diagnostics.Rmd
@@ -254,7 +254,7 @@ data <- as.data.frame(sapply(data, as.numeric))
 ```{r message=FALSE, warning=FALSE, fig.align='center'}
 library(datawizard) # for reshape_longer
 data |>
-  reshape_longer(cols = 3:6, colnames_to = "Method", values_to = "Distance") |>
+  reshape_longer(cols = 3:6, names_to = "Method", values_to = "Distance") |>
   ggplot(aes(x = sample_size, y = Distance, color = Method, fill = Method)) +
   geom_point(alpha = 0.3, stroke = 0, shape = 16) +
   geom_smooth(alpha = 0.2) +

From 39d158f2d46dfa96a7b391d6bc5c9752b1740742 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 9 Apr 2023 11:04:40 +0200
Subject: [PATCH 5/5] vignette renders now

---
 vignettes/diagnostics.Rmd | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vignettes/diagnostics.Rmd b/vignettes/diagnostics.Rmd
index d894483e6..8288bf123 100644
--- a/vignettes/diagnostics.Rmd
+++ b/vignettes/diagnostics.Rmd
@@ -12,8 +12,6 @@ vignette: >
   %\VignetteEngine{knitr::rmarkdown}
 editor_options: 
   chunk_output_type: console
-bibliography: bibliography.bib
-csl: apa.csl
 ---
 
 This vignette can be referred to by citing the package: