From fe77044bbf349ccc7813799a54b7995d8cb2168a Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 26 May 2023 13:45:51 +0200 Subject: [PATCH 01/10] plot method for data_tabulate --- NAMESPACE | 2 ++ R/plot.dw_data_tabulate.R | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 R/plot.dw_data_tabulate.R diff --git a/NAMESPACE b/NAMESPACE index 236899110..39d449fdb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,8 @@ S3method(data_plot,performance_pp_check) S3method(data_plot,point_estimate) S3method(data_plot,rope) S3method(data_plot,see_compare_parameters) +S3method(plot,dw_data_tabulate) +S3method(plot,dw_data_tabulates) S3method(plot,see_bayesfactor_models) S3method(plot,see_bayesfactor_parameters) S3method(plot,see_bayesfactor_savagedickey) diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R new file mode 100644 index 000000000..412834aba --- /dev/null +++ b/R/plot.dw_data_tabulate.R @@ -0,0 +1,28 @@ +#' @export + +plot.dw_data_tabulates <- function(x, value_lab = TRUE) { + lapply(x, function(dat) { + plot.dw_data_tabulate(dat, value_lab = value_lab) + }) +} + +#' @export + +plot.dw_data_tabulate <- function(x, value_lab = TRUE) { + + dat <- as.data.frame(x) + dat$label <- paste0(dat$N, "\n(", round(dat$`Raw %`, 2), "%)") + + out <- ggplot(dat, aes(x = Value, y = N)) + + geom_col() + + labs(title = unique(dat$Variable)) + + if (isTRUE(value_lab)) { + out <- out + + geom_text(aes(label = label), vjust = -0.5) + + ylim(c(0, max(dat$N) * 1.1)) + } + + out +} + From f6fdf1d6c37cebf2b101f43c597f9ba3efbe1bcf Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 30 May 2023 13:57:28 +0200 Subject: [PATCH 02/10] add args for na label and to remove na, add modern theme --- R/plot.dw_data_tabulate.R | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index 412834aba..16b75f0b6 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -1,26 +1,43 @@ #' @export -plot.dw_data_tabulates <- function(x, value_lab = TRUE) { +plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, + na_label = "(Missing)") { lapply(x, function(dat) { - plot.dw_data_tabulate(dat, value_lab = value_lab) + plot.dw_data_tabulate(dat, value_lab = value_lab, remove_na = remove_na, + na_label = na_label) }) } #' @export -plot.dw_data_tabulate <- function(x, value_lab = TRUE) { +plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, + na_label = "(Missing)") { dat <- as.data.frame(x) - dat$label <- paste0(dat$N, "\n(", round(dat$`Raw %`, 2), "%)") + + if (isTRUE(remove_na)) { + dat <- dat[!is.na(dat$Value), ] + dat$output <- dat$`Valid %` + } else { + dat$output <- dat$`Raw %` + + # deal with factors + dat$Value <- as.character(dat$Value) + dat$Value[is.na(dat$Value)] <- na_label + dat$Value <- factor(dat$Value, levels = c(setdiff(dat$Value, na_label), na_label)) + } + + dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)") out <- ggplot(dat, aes(x = Value, y = N)) + geom_col() + - labs(title = unique(dat$Variable)) + labs(title = unique(dat$Variable)) + + theme_modern() if (isTRUE(value_lab)) { out <- out + geom_text(aes(label = label), vjust = -0.5) + - ylim(c(0, max(dat$N) * 1.1)) + ylim(c(0, max(dat$N) * 1.2)) } out From 8f64c618f7851e6aac8fbf3648c83f167c3f31d9 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 2 Jun 2023 14:05:16 +0200 Subject: [PATCH 03/10] try adding error bars --- R/plot.dw_data_tabulate.R | 48 ++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index 16b75f0b6..9026bb984 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -1,33 +1,47 @@ #' @export plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, - na_label = "(Missing)") { + na_label = "(Missing)", error_bar = TRUE) { lapply(x, function(dat) { plot.dw_data_tabulate(dat, value_lab = value_lab, remove_na = remove_na, - na_label = na_label) + na_label = na_label, error_bar = error_bar) }) } #' @export plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, - na_label = "(Missing)") { + na_label = "(Missing)", error_bar = TRUE) { dat <- as.data.frame(x) if (isTRUE(remove_na)) { dat <- dat[!is.na(dat$Value), ] - dat$output <- dat$`Valid %` + dat$output <- dat[[which(startsWith(names(dat), "Valid"))]] } else { - dat$output <- dat$`Raw %` + dat$output <- dat[[which(startsWith(names(dat), "Raw"))]] - # deal with factors + # deal with missing values dat$Value <- as.character(dat$Value) dat$Value[is.na(dat$Value)] <- na_label - dat$Value <- factor(dat$Value, levels = c(setdiff(dat$Value, na_label), na_label)) + dat$Value <- factor( + dat$Value, levels = c(setdiff(dat$Value, na_label), na_label) + ) } - dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)") + if (isTRUE(error_bar)) { + total_n <- sum(dat$N) + rel_frq <- dat$output/100 + ci <- 1.96 * suppressWarnings(sqrt(rel_frq * (1 - rel_frq) / total_n)) + dat$upper.ci <- total_n * (rel_frq + ci) + dat$lower.ci <- total_n * (rel_frq - ci) + } + + if (isTRUE(error_bar)) { + dat$label <- paste0(dat$N, " (", round(dat$output, 2), "%)") + } else { + dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)") + } out <- ggplot(dat, aes(x = Value, y = N)) + geom_col() + @@ -35,9 +49,21 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, theme_modern() if (isTRUE(value_lab)) { - out <- out + - geom_text(aes(label = label), vjust = -0.5) + - ylim(c(0, max(dat$N) * 1.2)) + if (isTRUE(error_bar)) { + out <- out + + geom_text(aes(label = label), vjust = -1, hjust = 1.2) + + ylim(c(0, max(dat$N) * 1.5)) + } else { + out <- out + + geom_text(aes(label = label), vjust = -0.5) + + ylim(c(0, max(dat$N) * 1.2)) + } + } + + # add confidence intervals for frequencies + if (isTRUE(error_bar)) { + out <- out + + geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci), width = 0.5, color = "darkblue") } out From 4b340640b43eb65aa4021f125af5b05e933d7179 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 6 Jun 2023 13:18:21 +0200 Subject: [PATCH 04/10] add doc [skip ci] --- R/plot.dw_data_tabulate.R | 13 ++++++++++++ man/plot.dw_data_tabulate.Rd | 39 ++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 man/plot.dw_data_tabulate.Rd diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index 9026bb984..3e71bfd0a 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -1,3 +1,14 @@ +#' Plot tabulated data. +#' +#' @param x Object created by `datawizard::data_tabulate()`. +#' @param value_lab Boolean. Should values and percentages be displayed at the +#' top of each bar. +#' @param remove_na Boolean. Should missing values be dropped? +#' @param na_label The label given to missing values (only when +#' `remove_na = FALSE`). +#' @param error_bar Boolean. Should error bars be displayed? +#' +#' @rdname plot.dw_data_tabulate #' @export plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, @@ -8,6 +19,8 @@ plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, }) } +#' @rdname plot.dw_data_tabulate +#' #' @export plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, diff --git a/man/plot.dw_data_tabulate.Rd b/man/plot.dw_data_tabulate.Rd new file mode 100644 index 000000000..78f460450 --- /dev/null +++ b/man/plot.dw_data_tabulate.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.dw_data_tabulate.R +\name{plot.dw_data_tabulates} +\alias{plot.dw_data_tabulates} +\alias{plot.dw_data_tabulate} +\title{Plot tabulated data.} +\usage{ +\method{plot}{dw_data_tabulates}( + x, + value_lab = TRUE, + remove_na = FALSE, + na_label = "(Missing)", + error_bar = TRUE +) + +\method{plot}{dw_data_tabulate}( + x, + value_lab = TRUE, + remove_na = FALSE, + na_label = "(Missing)", + error_bar = TRUE +) +} +\arguments{ +\item{x}{Object created by \code{datawizard::data_tabulate()}.} + +\item{value_lab}{Boolean. Should values and percentages be displayed at the +top of each bar.} + +\item{remove_na}{Boolean. Should missing values be dropped?} + +\item{na_label}{The label given to missing values (only when +\code{remove_na = FALSE}).} + +\item{error_bar}{Boolean. Should error bars be displayed?} +} +\description{ +Plot tabulated data. +} From 7d8d19468d763eceb816cfe7299b1a74136cf6d5 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 25 Aug 2023 08:38:13 +0200 Subject: [PATCH 05/10] lintr --- R/plot.dw_data_tabulate.R | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index 3e71bfd0a..607e884c3 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -13,10 +13,10 @@ plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, na_label = "(Missing)", error_bar = TRUE) { - lapply(x, function(dat) { - plot.dw_data_tabulate(dat, value_lab = value_lab, remove_na = remove_na, - na_label = na_label, error_bar = error_bar) - }) + lapply(x, plot.dw_data_tabulate, + value_lab = value_lab, remove_na = remove_na, + na_label = na_label, error_bar = error_bar + ) } #' @rdname plot.dw_data_tabulate @@ -25,7 +25,6 @@ plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, na_label = "(Missing)", error_bar = TRUE) { - dat <- as.data.frame(x) if (isTRUE(remove_na)) { @@ -38,13 +37,14 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, dat$Value <- as.character(dat$Value) dat$Value[is.na(dat$Value)] <- na_label dat$Value <- factor( - dat$Value, levels = c(setdiff(dat$Value, na_label), na_label) + dat$Value, + levels = c(setdiff(dat$Value, na_label), na_label) ) } if (isTRUE(error_bar)) { total_n <- sum(dat$N) - rel_frq <- dat$output/100 + rel_frq <- dat$output / 100 ci <- 1.96 * suppressWarnings(sqrt(rel_frq * (1 - rel_frq) / total_n)) dat$upper.ci <- total_n * (rel_frq + ci) dat$lower.ci <- total_n * (rel_frq - ci) @@ -81,4 +81,3 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, out } - From c05e347b0e0ed454fb8f24b74388c2c3683b47da Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 25 Aug 2023 08:43:41 +0200 Subject: [PATCH 06/10] fix warnings --- R/plot.dw_data_tabulate.R | 18 ++++++++++++------ man/plot.dw_data_tabulate.Rd | 8 ++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index 607e884c3..861d93201 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -7,12 +7,14 @@ #' @param na_label The label given to missing values (only when #' `remove_na = FALSE`). #' @param error_bar Boolean. Should error bars be displayed? +#' @param ... Unused #' #' @rdname plot.dw_data_tabulate #' @export plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, - na_label = "(Missing)", error_bar = TRUE) { + na_label = "(Missing)", error_bar = TRUE, + ...) { lapply(x, plot.dw_data_tabulate, value_lab = value_lab, remove_na = remove_na, na_label = na_label, error_bar = error_bar @@ -24,7 +26,8 @@ plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, #' @export plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, - na_label = "(Missing)", error_bar = TRUE) { + na_label = "(Missing)", error_bar = TRUE, + ...) { dat <- as.data.frame(x) if (isTRUE(remove_na)) { @@ -56,7 +59,7 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)") } - out <- ggplot(dat, aes(x = Value, y = N)) + + out <- ggplot(dat, aes(x = .data$Value, y = .data$N)) + geom_col() + labs(title = unique(dat$Variable)) + theme_modern() @@ -64,11 +67,11 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, if (isTRUE(value_lab)) { if (isTRUE(error_bar)) { out <- out + - geom_text(aes(label = label), vjust = -1, hjust = 1.2) + + geom_text(aes(label = .data$label), vjust = -1, hjust = 1.2) + ylim(c(0, max(dat$N) * 1.5)) } else { out <- out + - geom_text(aes(label = label), vjust = -0.5) + + geom_text(aes(label = .data$label), vjust = -0.5) + ylim(c(0, max(dat$N) * 1.2)) } } @@ -76,7 +79,10 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, # add confidence intervals for frequencies if (isTRUE(error_bar)) { out <- out + - geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci), width = 0.5, color = "darkblue") + geom_errorbar( + aes(ymin = .data$lower.ci, ymax = .data$upper.ci), + width = 0.5, color = "darkblue" + ) } out diff --git a/man/plot.dw_data_tabulate.Rd b/man/plot.dw_data_tabulate.Rd index 78f460450..3b6eb8925 100644 --- a/man/plot.dw_data_tabulate.Rd +++ b/man/plot.dw_data_tabulate.Rd @@ -10,7 +10,8 @@ value_lab = TRUE, remove_na = FALSE, na_label = "(Missing)", - error_bar = TRUE + error_bar = TRUE, + ... ) \method{plot}{dw_data_tabulate}( @@ -18,7 +19,8 @@ value_lab = TRUE, remove_na = FALSE, na_label = "(Missing)", - error_bar = TRUE + error_bar = TRUE, + ... ) } \arguments{ @@ -33,6 +35,8 @@ top of each bar.} \code{remove_na = FALSE}).} \item{error_bar}{Boolean. Should error bars be displayed?} + +\item{...}{Unused} } \description{ Plot tabulated data. From 66abea2e158e3176ec56d7683327e6fc31afccd0 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 25 Aug 2023 08:55:39 +0200 Subject: [PATCH 07/10] add some tests --- R/plot.dw_data_tabulate.R | 18 +++++++++++------- tests/testthat/test-plot.dw_data_tabulate.R | 9 +++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 tests/testthat/test-plot.dw_data_tabulate.R diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index 861d93201..0cc498bb3 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -15,10 +15,17 @@ plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, na_label = "(Missing)", error_bar = TRUE, ...) { - lapply(x, plot.dw_data_tabulate, - value_lab = value_lab, remove_na = remove_na, - na_label = na_label, error_bar = error_bar - ) + if (length(x) == 1) { + plot.dw_data_tabulate( + x[[1]], value_lab = value_lab, remove_na = remove_na, + na_label = na_label, error_bar = error_bar + ) + } else { + lapply(x, plot.dw_data_tabulate, + value_lab = value_lab, remove_na = remove_na, + na_label = na_label, error_bar = error_bar + ) + } } #' @rdname plot.dw_data_tabulate @@ -51,9 +58,6 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, ci <- 1.96 * suppressWarnings(sqrt(rel_frq * (1 - rel_frq) / total_n)) dat$upper.ci <- total_n * (rel_frq + ci) dat$lower.ci <- total_n * (rel_frq - ci) - } - - if (isTRUE(error_bar)) { dat$label <- paste0(dat$N, " (", round(dat$output, 2), "%)") } else { dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)") diff --git a/tests/testthat/test-plot.dw_data_tabulate.R b/tests/testthat/test-plot.dw_data_tabulate.R new file mode 100644 index 000000000..40dd660a7 --- /dev/null +++ b/tests/testthat/test-plot.dw_data_tabulate.R @@ -0,0 +1,9 @@ +test_that("`plot.dw_data_tabulate()` works with single table", { + x <- datawizard::data_tabulate(mtcars, select = "cyl") + expect_s3_class(plot(x), "gg") +}) + +test_that("`plot.dw_data_tabulate()` works with multiple tables", { + x <- datawizard::data_tabulate(mtcars, select = c("cyl", "carb", "am")) + expect_true(is.list(plot(x))) +}) From 8fbb0dbb419274697193dcceceb5e9412f346aef Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 25 Aug 2023 08:58:49 +0200 Subject: [PATCH 08/10] bump news and description --- DESCRIPTION | 2 +- NEWS.md | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 388b3f9a4..349e7da6b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: see Title: Model Visualisation Toolbox for 'easystats' and 'ggplot2' -Version: 0.8.0.2 +Version: 0.8.0.3 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 51fd504e4..78a1afe06 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # see (development version) +## New features + +* There is now a `plot()` method for outputs of `datawizard::data_tabulate()` + (#293). + ## Minor Changes * The `print()` method for `performance::check_model()` now also evaluates the From e04947a325d05c2c19d25078f240059f6ae7556c Mon Sep 17 00:00:00 2001 From: "Brenton M. Wiernik" Date: Thu, 31 Aug 2023 09:37:19 -0400 Subject: [PATCH 09/10] tweaks 1. change `remove_na` to `show_na` and set default to conditionally show the missing category if any NA are present 2. use the Wilson confidence interval rather than the normal approximation to avoid CIs that escape the [0, 1] bounds 3. namespace ggplot2 functions 4. add arguments for the column and error bar colors, defaulting to blue hues for each --- R/plot.dw_data_tabulate.R | 104 +++++++++++++++++++++++++---------- man/plot.dw_data_tabulate.Rd | 39 ++++++++++--- 2 files changed, 104 insertions(+), 39 deletions(-) diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index 0cc498bb3..df70e044e 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -1,29 +1,50 @@ #' Plot tabulated data. #' #' @param x Object created by `datawizard::data_tabulate()`. -#' @param value_lab Boolean. Should values and percentages be displayed at the -#' top of each bar. -#' @param remove_na Boolean. Should missing values be dropped? -#' @param na_label The label given to missing values (only when -#' `remove_na = FALSE`). -#' @param error_bar Boolean. Should error bars be displayed? +#' @param label_values Logical. Should values and percentages be displayed at the +#' top of each bar. +#' @param show_na Should missing values be dropped? Can be `"if_any"` (default) to show +#' the missing category only if any missing values are present, `"always"` to +#' always show the missing category, or `"never"` to never show the missing +#' category. +#' @param na_label The label given to missing values when they are shown. +#' @param error_bar Logical. Should error bars be displayed? +#' If `TRUE`, confidence intervals computed using the Wilson method are shown. +#' See Brown et al. (2001) for details. +#' @param ci Confidence Interval (CI) level. Default to `0.95` (⁠95%⁠). +#' @param fill_col Color to use for category columns (default: "#87CEFA"). +#' @param color_error_bar Color to use for error bars (default: "#607B8B"). #' @param ... Unused #' +#' @references +#' Brown, L. D., Cai, T. T., & DasGupta, A. (2001). +#' Interval estimation for a binomial proportion. +#' _Statistical Science, 16_(2), 101–133. \doi{10.1214/ss/1009213286} +#' #' @rdname plot.dw_data_tabulate #' @export -plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, - na_label = "(Missing)", error_bar = TRUE, +plot.dw_data_tabulates <- function(x, label_values = TRUE, + show_na = c("if_any", "always", "never"), + na_label = "(Missing)", + error_bar = TRUE, ci = .95, + fill_col = "#87CEFA", + color_error_bar = "#607B8B", ...) { + show_na <- match.arg(show_na, choices = c("if_any", "always", "never")) if (length(x) == 1) { plot.dw_data_tabulate( - x[[1]], value_lab = value_lab, remove_na = remove_na, - na_label = na_label, error_bar = error_bar + x[[1]], label_values = label_values, + show_na = show_na, na_label = na_label, + error_bar = error_bar, ci = ci, + fill_col = fill_col, color_error_bar = color_error_bar ) } else { lapply(x, plot.dw_data_tabulate, - value_lab = value_lab, remove_na = remove_na, - na_label = na_label, error_bar = error_bar + label_values = label_values, + show_na = show_na, na_label = na_label, + error_bar = error_bar, ci = ci, + fill_col = fill_col, color_error_bar = color_error_bar ) } } @@ -32,12 +53,25 @@ plot.dw_data_tabulates <- function(x, value_lab = TRUE, remove_na = FALSE, #' #' @export -plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, - na_label = "(Missing)", error_bar = TRUE, +plot.dw_data_tabulate <- function(x, label_values = TRUE, + show_na = c("if_any", "always", "never"), + na_label = "(Missing)", + error_bar = TRUE, ci = .95, + fill_col = "#87CEFA", + color_error_bar = "#607B8B", ...) { + show_na <- match.arg(show_na, choices = c("if_any", "always", "never")) dat <- as.data.frame(x) - if (isTRUE(remove_na)) { + if (show_na == "if_any") { + if (any(is.na(dat$Value))) { + show_na <- ifelse(dat[is.na(dat$Value), "N"] > 0, "always", "never") + } else { + show_na <- "never" + } + } + + if (show_na == "never") { dat <- dat[!is.na(dat$Value), ] dat$output <- dat[[which(startsWith(names(dat), "Valid"))]] } else { @@ -54,40 +88,50 @@ plot.dw_data_tabulate <- function(x, value_lab = TRUE, remove_na = FALSE, if (isTRUE(error_bar)) { total_n <- sum(dat$N) - rel_frq <- dat$output / 100 - ci <- 1.96 * suppressWarnings(sqrt(rel_frq * (1 - rel_frq) / total_n)) - dat$upper.ci <- total_n * (rel_frq + ci) - dat$lower.ci <- total_n * (rel_frq - ci) + props <- dat$output / 100 + dat <- cbind(dat, CI = ci, .wilson_ci(prop = props, total_n = total_n, ci = ci) * total_n) dat$label <- paste0(dat$N, " (", round(dat$output, 2), "%)") } else { dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)") } - out <- ggplot(dat, aes(x = .data$Value, y = .data$N)) + - geom_col() + - labs(title = unique(dat$Variable)) + + out <- ggplot2::ggplot(dat) + + ggplot2::aes(x = .data$Value, y = .data$N) + + ggplot2::geom_col(fill = fill_col) + + ggplot2::labs(title = unique(dat$Variable)) + theme_modern() - if (isTRUE(value_lab)) { + if (isTRUE(label_values)) { if (isTRUE(error_bar)) { out <- out + - geom_text(aes(label = .data$label), vjust = -1, hjust = 1.2) + - ylim(c(0, max(dat$N) * 1.5)) + ggplot2::geom_text(ggplot2::aes(label = .data$label), vjust = -1, hjust = 1.2) + + ggplot2::coord_cartesian(ylim = c(0, max(dat$CI_high))) } else { out <- out + - geom_text(aes(label = .data$label), vjust = -0.5) + - ylim(c(0, max(dat$N) * 1.2)) + ggplot2::geom_text(ggplot2::aes(label = .data$label), vjust = -0.5) + + ggplot2::coord_cartesian(ylim = c(0, max(dat$N) * 1.2)) } } # add confidence intervals for frequencies if (isTRUE(error_bar)) { out <- out + - geom_errorbar( - aes(ymin = .data$lower.ci, ymax = .data$upper.ci), - width = 0.5, color = "darkblue" + ggplot2::geom_linerange( + ggplot2::aes(ymin = .data$CI_low, ymax = .data$CI_high), + color = color_error_bar ) } out } + +.wilson_ci <- function(prop, total_n, ci = .95) { + z <- qnorm((1 - ci) / 2, lower.tail = FALSE) + z2 <- z^2 + p1 <- prop + 0.5 * z2 / total_n + p2 <- z * sqrt((prop * (1 - prop) + 0.25 * z2 / total_n) / total_n) + p3 <- 1 + z2 / total_n + CI_low <- (p1 - p2) / p3 + CI_high <- (p1 + p2) / p3 + return(data.frame(CI_low = CI_low, CI_high = CI_high)) +} diff --git a/man/plot.dw_data_tabulate.Rd b/man/plot.dw_data_tabulate.Rd index 3b6eb8925..21d22fd2e 100644 --- a/man/plot.dw_data_tabulate.Rd +++ b/man/plot.dw_data_tabulate.Rd @@ -7,37 +7,58 @@ \usage{ \method{plot}{dw_data_tabulates}( x, - value_lab = TRUE, - remove_na = FALSE, + label_values = TRUE, + show_na = c("if_any", "always", "never"), na_label = "(Missing)", error_bar = TRUE, + ci = 0.95, + fill_col = "#87CEFA", + color_error_bar = "#607B8B", ... ) \method{plot}{dw_data_tabulate}( x, - value_lab = TRUE, - remove_na = FALSE, + label_values = TRUE, + show_na = c("if_any", "always", "never"), na_label = "(Missing)", error_bar = TRUE, + ci = 0.95, + fill_col = "#87CEFA", + color_error_bar = "#607B8B", ... ) } \arguments{ \item{x}{Object created by \code{datawizard::data_tabulate()}.} -\item{value_lab}{Boolean. Should values and percentages be displayed at the +\item{label_values}{Logical. Should values and percentages be displayed at the top of each bar.} -\item{remove_na}{Boolean. Should missing values be dropped?} +\item{show_na}{Should missing values be dropped? Can be \code{"if_any"} (default) to show +the missing category only if any missing values are present, \code{"always"} to +always show the missing category, or \code{"never"} to never show the missing +category.} -\item{na_label}{The label given to missing values (only when -\code{remove_na = FALSE}).} +\item{na_label}{The label given to missing values when they are shown.} -\item{error_bar}{Boolean. Should error bars be displayed?} +\item{error_bar}{Logical. Should error bars be displayed? +If \code{TRUE}, confidence intervals computed using the Wilson method are shown. +See Brown et al. (2001) for details.} + +\item{ci}{Confidence Interval (CI) level. Default to \code{0.95} (⁠95\%⁠).} + +\item{fill_col}{Color to use for category columns (default: "#87CEFA").} + +\item{color_error_bar}{Color to use for error bars (default: "#607B8B").} \item{...}{Unused} } \description{ Plot tabulated data. } +\references{ +Brown, L. D., Cai, T. T., & DasGupta, A. (2001). +Interval estimation for a binomial proportion. +\emph{Statistical Science, 16}(2), 101–133. \doi{10.1214/ss/1009213286} +} From 1079e2b2cded2eebeded4fa69dc0bd5bfddbcdb3 Mon Sep 17 00:00:00 2001 From: "Brenton M. Wiernik" Date: Thu, 31 Aug 2023 10:13:46 -0400 Subject: [PATCH 10/10] namespace qnorm --- R/plot.dw_data_tabulate.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot.dw_data_tabulate.R b/R/plot.dw_data_tabulate.R index df70e044e..9533c16e7 100644 --- a/R/plot.dw_data_tabulate.R +++ b/R/plot.dw_data_tabulate.R @@ -126,7 +126,7 @@ plot.dw_data_tabulate <- function(x, label_values = TRUE, } .wilson_ci <- function(prop, total_n, ci = .95) { - z <- qnorm((1 - ci) / 2, lower.tail = FALSE) + z <- stats::qnorm((1 - ci) / 2, lower.tail = FALSE) z2 <- z^2 p1 <- prop + 0.5 * z2 / total_n p2 <- z * sqrt((prop * (1 - prop) + 0.25 * z2 / total_n) / total_n)