Skip to content

Commit

Permalink
Merge pull request #6443 from tidyverse/warnings-collection
Browse files Browse the repository at this point in the history
Collect warnings and add `dplyr_last_warnings()`
  • Loading branch information
lionel- authored Sep 21, 2022
2 parents e3efb3c + 104bddf commit 4abc428
Show file tree
Hide file tree
Showing 12 changed files with 803 additions and 54 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ BugReports: https://github.com/tidyverse/dplyr/issues
Depends:
R (>= 3.4.0)
Imports:
cli (>= 3.4.0),
generics,
glue (>= 1.3.2),
lifecycle (>= 1.0.2),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ S3method(print,any_vars)
S3method(print,dplyr_join_by)
S3method(print,dplyr_sel_vars)
S3method(print,fun_list)
S3method(print,last_dplyr_warnings)
S3method(print,src)
S3method(pull,data.frame)
S3method(rbind,grouped_df)
Expand Down Expand Up @@ -334,6 +335,7 @@ export(join_by)
export(lag)
export(last)
export(last_col)
export(last_dplyr_warnings)
export(lead)
export(left_join)
export(location)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# dplyr (development version)

* Warnings emitted inside `mutate()` and variants are now collected and stashed
away. Run the new `last_dplyr_warnings()` function to see the warnings emitted
within dplyr verbs during the last top-level command.

This fixes performance issues when thousands of warnings are emitted with
rowwise and grouped data frames (#6005, #6236).

* `group_by_prepare()` loses the `caller_env` argument. It was rarely used
and it is no longer needed (#6444).

Expand Down
170 changes: 164 additions & 6 deletions R/conditions.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,7 @@ quo_as_label <- function(quo) {
}

local_error_context <- function(dots, .index, mask, frame = caller_env()) {
expr <- dots[[.index]]
if (quo_is_call(expr, "invisible")) {
expr <- ""
} else {
expr <- quo_as_label(dots[[.index]])
}
expr <- dot_as_label(dots[[.index]])

error_context <- env(
error_name = arg_name(dots, .index),
Expand All @@ -95,6 +90,25 @@ peek_error_context <- function() {
context_peek("dplyr_error_context", "peek_error_context", "dplyr error handling")
}

dot_as_label <- function(expr) {
if (quo_is_call(expr, "invisible")) {
""
} else {
quo_as_label(expr)
}
}

mask_type <- function(mask = peek_mask()) {
if (mask$get_size() > 0) {
if (mask$is_grouped_df()) {
return("grouped")
} else if (mask$is_rowwise_df()) {
return("rowwise")
}
}
"ungrouped"
}

cnd_bullet_header <- function(what) {
error_context <- peek_error_context()
error_name <- error_context$error_name
Expand Down Expand Up @@ -164,3 +178,147 @@ skip_internal_condition <- function(cnd) {
cnd
}
}


# Warnings -------------------------------------------------------------

#' Show warnings from the last command
#'
#' Warnings that occur inside a dplyr verb like `mutate()` are caught
#' and stashed away instead of being emitted to the console. This
#' prevents rowwise and grouped data frames from flooding the console
#' with warnings. To see the original warnings, use
#' `last_dplyr_warnings()`.
#'
#' @param n Passed to [head()] so that only the first `n` warnings are
#' displayed.
#' @keywords internal
#' @export
last_dplyr_warnings <- function(n = 5) {
if (!identical(n, Inf)) {
check_number(n)
stopifnot(n >= 0)
}

warnings <- the$last_warnings
n_remaining <- max(length(warnings) - n, 0L)

warnings <- head(warnings, n = n)
warnings <- map(warnings, new_dplyr_warning)

structure(
warnings,
class = c("last_dplyr_warnings", "list"),
n_shown = n,
n_remaining = n_remaining
)
}

on_load({
the$last_warnings <- list()
the$last_cmd_frame <- ""
})

# Flushes warnings if a new top-level command is detected
push_dplyr_warnings <- function(warnings) {
last <- the$last_cmd_frame
current <- obj_address(sys.frame(1))

if (!identical(last, current)) {
reset_dplyr_warnings()
the$last_cmd_frame <- current
}

the$last_warnings <- c(the$last_warnings, warnings)
}

# Also used in tests
reset_dplyr_warnings <- function() {
the$last_warnings <- list()
}

signal_warnings <- function(warnings, error_call) {
n <- length(warnings)
if (!n) {
return()
}

push_dplyr_warnings(warnings)

first <- new_dplyr_warning(warnings[[1]])
call <- format_error_call(error_call)

msg <- paste_line(
cli::format_warning(c(
"There {cli::qty(n)} {?was/were} {n} warning{?s} in a {call} step.",
if (n > 1) "The first warning was:"
)),
paste(cli::col_yellow("!"), cnd_message(first))
)

# https://github.com/r-lib/cli/issues/525
if (n > 1) {
msg <- paste_line(
msg,
cli::format_warning(c(
i = if (n > 1) "Run {.run dplyr::last_dplyr_warnings()} to see the {n - 1} remaining warning{?s}."
))
)
}

warn(msg, use_cli_format = FALSE)
}

new_dplyr_warning <- function(data) {
label <- cur_group_label(
data$type,
data$group_data$id,
data$group_data$group
)
if (nzchar(label)) {
label <- glue(" in {label}")
}

msg <- glue::glue("Problem{label} while computing `{data$name} = {data$expr}`.")

warning_cnd(
message = msg,
parent = data$cnd,
call = data$call,
trace = data$cnd$trace
)
}

#' @export
print.last_dplyr_warnings <- function(x, ...) {
# Opt into experimental grayed out tree
local_options(
"rlang:::trace_display_tree" = TRUE
)
print(unstructure(x), ..., simplify = "none")

n_remaining <- attr(x, "n_remaining")
if (n_remaining) {
n_more <- attr(x, "n_shown") * 2
cli::cli_bullets(c(
"... with {n_remaining} more warning{?s}.",
"i" = "Run {.run dplyr::last_dplyr_warnings(n = {n_more})} to show more."
))
}
}

# rlang should export this routine
error_call <- function(call) {
tryCatch(
abort("", call = call),
error = conditionCall
)
}

cnd_message_lines <- function(cnd, ...) {
c(
"!" = cnd_header(cnd, ...),
cnd_body(cnd, ...),
cnd_footer(cnd, ...)
)
}
48 changes: 39 additions & 9 deletions R/context.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,44 @@ group_labels_details <- function(keys) {
}), ", ")
}

cur_group_label <- function() {
mask <- peek_mask()
if(mask$is_grouped_df() && mask$get_size() > 0) {
glue("group {id}: {label}", id = cur_group_id(), label = group_labels_details(cur_group()))
} else if (mask$is_rowwise_df() && mask$get_size() > 0) {
paste0("row ", cur_group_id())
} else {
""
}
cur_group_label <- function(type = mask_type(),
id = cur_group_id(),
group = cur_group()) {
switch(
type,
ungrouped = "",
grouped = glue("group {id}: {label}", label = group_labels_details(group)),
rowwise = glue("row {id}"),
stop_mask_type(type)
)
}

cur_group_data <- function(mask_type) {
switch(
mask_type,
ungrouped = list(),
grouped = list(id = cur_group_id(), group = cur_group()),
rowwise = list(id = cur_group_id()),
stop_mask_type(mask_type)
)
}

stop_mask_type <- function(type) {
cli::cli_abort(
"Unexpected mask type {.val {type}}.",
.internal = TRUE
)
}

cnd_data <- function(cnd, ctxt, mask_type, call) {
list(
cnd = cnd,
name = ctxt$error_name,
expr = ctxt$error_expression,
type = mask_type,
group_data = cur_group_data(mask_type),
call = call
)
}

#' @rdname context
Expand Down Expand Up @@ -132,6 +161,7 @@ context_local <- function(name, value, frame = caller_env()) {
old <- context_poke(name, value)
expr <- expr(on.exit(context_poke(!!name, !!old), add = TRUE))
eval_bare(expr, frame)
value
}

peek_column <- function(call = caller_env()) {
Expand Down
3 changes: 3 additions & 0 deletions R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@
# reexport the deprecated `tibble::data_frame()` function

on_load(local_use_cli())

# Singletons
the <- new_environment()
43 changes: 26 additions & 17 deletions R/mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,19 +224,30 @@ mutate_cols <- function(.data, dots, error_call = caller_env()) {

error_call <- dplyr_error_call(error_call)

# `error_call()` does some non-trivial work, e.g. climbing frame
# environments to find generic calls. We avoid evaluating it
# repeatedly in the loop by assigning it here (lazily as we only
# need it for the error path).
delayedAssign("error_call_forced", error_call(error_call))

mask <- DataMask$new(.data, "mutate", error_call = error_call)
mask_type <- mask_type(mask)
old_current_column <- context_peek_bare("column")
error_ctxt <- NULL

on.exit(context_poke("column", old_current_column), add = TRUE)
on.exit(mask$forget(), add = TRUE)

new_columns <- set_names(list(), character())
warnings <- list()

withCallingHandlers(
for (i in seq_along(dots)) {
for (i in seq_along(dots)) local({
error_ctxt <<- local_error_context(dots = dots, .index = i, mask = mask)
context_poke("column", old_current_column)
new_columns <- mutate_col(dots[[i]], .data, mask, new_columns)
},

new_columns <<- mutate_col(dots[[i]], .data, mask, new_columns)
}),
error = function(e) {
local_error_context(dots = dots, .index = i, mask = mask)

Expand All @@ -254,26 +265,24 @@ mutate_cols <- function(.data, dots, error_call = caller_env()) {
)
},
warning = function(w) {
# Check if there is an upstack calling handler that would muffle
# the warning. This avoids doing the expensive work below for a
# silenced warning (#5675).
if (check_muffled_warning(w)) {
maybe_restart("muffleWarning")
# Don't entrace more than 5 warnings because this is very costly
if (is_null(w$trace) && length(warnings) < 5) {
w$trace <- trace_back(bottom = error_call)
}
new <- cnd_data(
cnd = w,
error_ctxt,
mask_type = mask_type,
call = error_call_forced
)

local_error_context(dots = dots, .index = i, mask = mask)

warn(c(
cnd_bullet_header("computing"),
i = cnd_header(w),
i = cnd_bullet_cur_group_label(what = "warning")
))

# Cancel `w`
warnings <<- c(warnings, list(new))
maybe_restart("muffleWarning")
}
)

signal_warnings(warnings, error_call)

is_zap <- map_lgl(new_columns, inherits, "rlang_zap")
new_columns[is_zap] <- rep(list(NULL), sum(is_zap))

Expand Down
20 changes: 20 additions & 0 deletions man/last_dplyr_warnings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4abc428

Please sign in to comment.