Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect warnings and add dplyr_last_warnings() #6443

Merged
merged 23 commits into from
Sep 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ BugReports: https://github.com/tidyverse/dplyr/issues
Depends:
R (>= 3.4.0)
Imports:
cli (>= 3.4.0),
generics,
glue (>= 1.3.2),
lifecycle (>= 1.0.2),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ S3method(print,any_vars)
S3method(print,dplyr_join_by)
S3method(print,dplyr_sel_vars)
S3method(print,fun_list)
S3method(print,last_dplyr_warnings)
S3method(print,src)
S3method(pull,data.frame)
S3method(rbind,grouped_df)
Expand Down Expand Up @@ -334,6 +335,7 @@ export(join_by)
export(lag)
export(last)
export(last_col)
export(last_dplyr_warnings)
export(lead)
export(left_join)
export(location)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# dplyr (development version)

* Warnings emitted inside `mutate()` and variants are now collected and stashed
away. Run the new `last_dplyr_warnings()` function to see the warnings emitted
within dplyr verbs during the last top-level command.

This fixes performance issues when thousands of warnings are emitted with
rowwise and grouped data frames (#6005, #6236).

* `group_by_prepare()` loses the `caller_env` argument. It was rarely used
and it is no longer needed (#6444).

Expand Down
170 changes: 164 additions & 6 deletions R/conditions.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,7 @@ quo_as_label <- function(quo) {
}

local_error_context <- function(dots, .index, mask, frame = caller_env()) {
expr <- dots[[.index]]
if (quo_is_call(expr, "invisible")) {
expr <- ""
} else {
expr <- quo_as_label(dots[[.index]])
}
expr <- dot_as_label(dots[[.index]])

error_context <- env(
error_name = arg_name(dots, .index),
Expand All @@ -95,6 +90,25 @@ peek_error_context <- function() {
context_peek("dplyr_error_context", "peek_error_context", "dplyr error handling")
}

dot_as_label <- function(expr) {
if (quo_is_call(expr, "invisible")) {
""
} else {
quo_as_label(expr)
}
}

mask_type <- function(mask = peek_mask()) {
if (mask$get_size() > 0) {
if (mask$is_grouped_df()) {
return("grouped")
} else if (mask$is_rowwise_df()) {
return("rowwise")
}
}
"ungrouped"
}

cnd_bullet_header <- function(what) {
error_context <- peek_error_context()
error_name <- error_context$error_name
Expand Down Expand Up @@ -164,3 +178,147 @@ skip_internal_condition <- function(cnd) {
cnd
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat that this stashes all 4 warnings, but it is a little hard to know which sets of warnings came from which expressions in fn(). Not sure if there is anything to do, just wanted to mention it.

library(dplyr)

fn <- function() {
  throws_warning <- mtcars %>%
    group_by(vs) %>%
    mutate(mpg = sqrt(-mpg))
  
  throws_warning2 <- mtcars %>%
    group_by(vs) %>%
    mutate(mpg = sqrt(-mpg))
}

fn()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I don't know how to do better here. If you source the file you'll get srcrefs buried in the backtraces of dplyr_last_warnings().

Each mutate() sys-call should have its own srcref that we could show in the summary warnings, I'll look into making advantage of that. I'm not sure why this isn't the case already actually.

}
}


# Warnings -------------------------------------------------------------

#' Show warnings from the last command
#'
#' Warnings that occur inside a dplyr verb like `mutate()` are caught
#' and stashed away instead of being emitted to the console. This
#' prevents rowwise and grouped data frames from flooding the console
#' with warnings. To see the original warnings, use
#' `last_dplyr_warnings()`.
#'
#' @param n Passed to [head()] so that only the first `n` warnings are
#' displayed.
#' @keywords internal
#' @export
last_dplyr_warnings <- function(n = 5) {
if (!identical(n, Inf)) {
check_number(n)
stopifnot(n >= 0)
}

warnings <- the$last_warnings
n_remaining <- max(length(warnings) - n, 0L)

warnings <- head(warnings, n = n)
warnings <- map(warnings, new_dplyr_warning)

structure(
warnings,
class = c("last_dplyr_warnings", "list"),
n_shown = n,
n_remaining = n_remaining
)
}

on_load({
the$last_warnings <- list()
the$last_cmd_frame <- ""
})

# Flushes warnings if a new top-level command is detected
push_dplyr_warnings <- function(warnings) {
last <- the$last_cmd_frame
current <- obj_address(sys.frame(1))

if (!identical(last, current)) {
reset_dplyr_warnings()
the$last_cmd_frame <- current
}

the$last_warnings <- c(the$last_warnings, warnings)
}

# Also used in tests
reset_dplyr_warnings <- function() {
the$last_warnings <- list()
}

signal_warnings <- function(warnings, error_call) {
n <- length(warnings)
if (!n) {
return()
}

push_dplyr_warnings(warnings)

first <- new_dplyr_warning(warnings[[1]])
call <- format_error_call(error_call)

msg <- paste_line(
cli::format_warning(c(
"There {cli::qty(n)} {?was/were} {n} warning{?s} in a {call} step.",
if (n > 1) "The first warning was:"
)),
paste(cli::col_yellow("!"), cnd_message(first))
)

# https://github.com/r-lib/cli/issues/525
if (n > 1) {
msg <- paste_line(
msg,
cli::format_warning(c(
i = if (n > 1) "Run {.run dplyr::last_dplyr_warnings()} to see the {n - 1} remaining warning{?s}."
))
)
}

warn(msg, use_cli_format = FALSE)
}

new_dplyr_warning <- function(data) {
label <- cur_group_label(
data$type,
data$group_data$id,
data$group_data$group
)
if (nzchar(label)) {
label <- glue(" in {label}")
}

msg <- glue::glue("Problem{label} while computing `{data$name} = {data$expr}`.")

warning_cnd(
message = msg,
parent = data$cnd,
call = data$call,
trace = data$cnd$trace
)
}

#' @export
print.last_dplyr_warnings <- function(x, ...) {
# Opt into experimental grayed out tree
local_options(
"rlang:::trace_display_tree" = TRUE
)
print(unstructure(x), ..., simplify = "none")

n_remaining <- attr(x, "n_remaining")
if (n_remaining) {
n_more <- attr(x, "n_shown") * 2
cli::cli_bullets(c(
"... with {n_remaining} more warning{?s}.",
"i" = "Run {.run dplyr::last_dplyr_warnings(n = {n_more})} to show more."
))
}
}

# rlang should export this routine
error_call <- function(call) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a hack to get the result of rlang:::error_call(), which has special behaviour compared to frame_call(), for instance with S3 methods.

Tracked in r-lib/rlang#1474.

tryCatch(
abort("", call = call),
error = conditionCall
)
}

cnd_message_lines <- function(cnd, ...) {
c(
"!" = cnd_header(cnd, ...),
cnd_body(cnd, ...),
cnd_footer(cnd, ...)
)
}
48 changes: 39 additions & 9 deletions R/context.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,44 @@ group_labels_details <- function(keys) {
}), ", ")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably backtick the {name} = {value} result in group_labels_details() here so it shows up a little better in the error message. Fine if you don't want to do that here

}

cur_group_label <- function() {
mask <- peek_mask()
if(mask$is_grouped_df() && mask$get_size() > 0) {
glue("group {id}: {label}", id = cur_group_id(), label = group_labels_details(cur_group()))
} else if (mask$is_rowwise_df() && mask$get_size() > 0) {
paste0("row ", cur_group_id())
} else {
""
}
cur_group_label <- function(type = mask_type(),
id = cur_group_id(),
group = cur_group()) {
switch(
type,
ungrouped = "",
grouped = glue("group {id}: {label}", label = group_labels_details(group)),
rowwise = glue("row {id}"),
stop_mask_type(type)
)
}

cur_group_data <- function(mask_type) {
switch(
mask_type,
ungrouped = list(),
grouped = list(id = cur_group_id(), group = cur_group()),
rowwise = list(id = cur_group_id()),
stop_mask_type(mask_type)
)
}

stop_mask_type <- function(type) {
cli::cli_abort(
"Unexpected mask type {.val {type}}.",
.internal = TRUE
)
}

cnd_data <- function(cnd, ctxt, mask_type, call) {
list(
cnd = cnd,
name = ctxt$error_name,
expr = ctxt$error_expression,
type = mask_type,
group_data = cur_group_data(mask_type),
call = call
)
}

#' @rdname context
Expand Down Expand Up @@ -132,6 +161,7 @@ context_local <- function(name, value, frame = caller_env()) {
old <- context_poke(name, value)
expr <- expr(on.exit(context_poke(!!name, !!old), add = TRUE))
eval_bare(expr, frame)
value
}

peek_column <- function(call = caller_env()) {
Expand Down
3 changes: 3 additions & 0 deletions R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@
# reexport the deprecated `tibble::data_frame()` function

on_load(local_use_cli())

# Singletons
the <- new_environment()
43 changes: 26 additions & 17 deletions R/mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,19 +224,30 @@ mutate_cols <- function(.data, dots, error_call = caller_env()) {

error_call <- dplyr_error_call(error_call)

# `error_call()` does some non-trivial work, e.g. climbing frame
# environments to find generic calls. We avoid evaluating it
# repeatedly in the loop by assigning it here (lazily as we only
# need it for the error path).
delayedAssign("error_call_forced", error_call(error_call))

mask <- DataMask$new(.data, "mutate", error_call = error_call)
mask_type <- mask_type(mask)
old_current_column <- context_peek_bare("column")
error_ctxt <- NULL

on.exit(context_poke("column", old_current_column), add = TRUE)
on.exit(mask$forget(), add = TRUE)

new_columns <- set_names(list(), character())
warnings <- list()

withCallingHandlers(
for (i in seq_along(dots)) {
for (i in seq_along(dots)) local({
error_ctxt <<- local_error_context(dots = dots, .index = i, mask = mask)
context_poke("column", old_current_column)
new_columns <- mutate_col(dots[[i]], .data, mask, new_columns)
},

new_columns <<- mutate_col(dots[[i]], .data, mask, new_columns)
}),
error = function(e) {
local_error_context(dots = dots, .index = i, mask = mask)

Expand All @@ -254,26 +265,24 @@ mutate_cols <- function(.data, dots, error_call = caller_env()) {
)
},
warning = function(w) {
# Check if there is an upstack calling handler that would muffle
# the warning. This avoids doing the expensive work below for a
# silenced warning (#5675).
if (check_muffled_warning(w)) {
maybe_restart("muffleWarning")
# Don't entrace more than 5 warnings because this is very costly
if (is_null(w$trace) && length(warnings) < 5) {
w$trace <- trace_back(bottom = error_call)
}
new <- cnd_data(
cnd = w,
error_ctxt,
mask_type = mask_type,
call = error_call_forced
)

local_error_context(dots = dots, .index = i, mask = mask)

warn(c(
cnd_bullet_header("computing"),
i = cnd_header(w),
i = cnd_bullet_cur_group_label(what = "warning")
))

# Cancel `w`
warnings <<- c(warnings, list(new))
maybe_restart("muffleWarning")
}
)

signal_warnings(warnings, error_call)

is_zap <- map_lgl(new_columns, inherits, "rlang_zap")
new_columns[is_zap] <- rep(list(NULL), sum(is_zap))

Expand Down
20 changes: 20 additions & 0 deletions man/last_dplyr_warnings.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading