Skip to content

Commit

Permalink
Deal with the global variable note via utils::globalVariables()
Browse files Browse the repository at this point in the history
  • Loading branch information
mayer79 committed Mar 25, 2024
1 parent c92335e commit e9bf7bd
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 16 deletions.
6 changes: 0 additions & 6 deletions R/measure_importance.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ measure_min_depth <- function(min_depth_frame, mean_sample){

# Calculate the number of nodes split on each variable for a data frame with the whole forest
measure_no_of_nodes <- function(forest_table){
`split var` <- NULL
frame <- dplyr::group_by(forest_table, variable = variable) %>%
dplyr::summarize(no_of_nodes = dplyr::n())
frame <- as.data.frame(frame[!is.na(frame$variable),])
Expand Down Expand Up @@ -60,7 +59,6 @@ measure_vimp_ranger <- function(forest){

# Calculate the number of trees using each variable for splitting
measure_no_of_trees <- function(min_depth_frame){
variable <- NULL
frame <- dplyr::group_by(min_depth_frame, variable) %>%
dplyr::summarize(no_of_trees = n()) %>%
as.data.frame()
Expand All @@ -69,7 +67,6 @@ measure_no_of_trees <- function(min_depth_frame){

# Calculate the number of times each variable is split on the root node
measure_times_a_root <- function(min_depth_frame){
variable <- NULL
frame <- min_depth_frame[min_depth_frame$minimal_depth == 0, ] %>%
dplyr::group_by(variable) %>%
dplyr::summarize(times_a_root = n()) %>%
Expand Down Expand Up @@ -113,7 +110,6 @@ measure_importance <- function(forest, mean_sample = "top_trees", measures = NUL
#' @importFrom data.table rbindlist
#' @export
measure_importance.randomForest <- function(forest, mean_sample = "top_trees", measures = NULL){
tree <- NULL; `split var` <- NULL; depth <- NULL
if(is.null(measures)){
if(forest$type %in% c("classification", "unsupervised")){
measures <- c("mean_min_depth", "no_of_nodes", "accuracy_decrease",
Expand Down Expand Up @@ -177,7 +173,6 @@ measure_importance.randomForest <- function(forest, mean_sample = "top_trees", m
#' @importFrom data.table rbindlist
#' @export
measure_importance.ranger <- function(forest, mean_sample = "top_trees", measures = NULL){
tree <- NULL; splitvarName <- NULL; depth <- NULL
if(is.null(measures)){
measures <- c("mean_min_depth", "no_of_nodes", forest$importance.mode, "no_of_trees", "times_a_root", "p_value")
}
Expand Down Expand Up @@ -296,7 +291,6 @@ plot_multi_way_importance <- function(importance_frame, x_measure = "mean_min_de
y_measure = "times_a_root", size_measure = NULL,
min_no_of_trees = 0, no_of_labels = 10,
main = "Multi-way importance plot"){
variable <- NULL
if(any(c("randomForest", "ranger") %in% class(importance_frame))){
importance_frame <- measure_importance(importance_frame)
}
Expand Down
4 changes: 0 additions & 4 deletions R/min_depth_distribution.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#' @import dplyr
#' @importFrom data.table rbindlist
min_depth_distribution <- function(forest){
tree <- NULL; `split var` <- NULL; depth <- NULL
forest_table <- forest2df(forest)
min_depth_frame <- dplyr::group_by(forest_table, tree, variable) %>%
dplyr::summarize(minimal_depth = min(depth), .groups = "drop")
Expand All @@ -24,7 +23,6 @@ min_depth_distribution <- function(forest){

# Count the trees in which each variable had a given minimal depth
min_depth_count <- function(min_depth_frame){
tree <- NULL; minimal_depth <- NULL; variable <- NULL
mean_tree_depth <- dplyr::group_by(min_depth_frame, tree) %>%
dplyr::summarize(depth = max(minimal_depth) + 1) %>%
as.data.frame()
Expand All @@ -45,7 +43,6 @@ min_depth_count <- function(min_depth_frame){

# Get a data frame with means of minimal depth calculated using sample = c("all_trees", "top_trees", "relevant_trees")
get_min_depth_means <- function(min_depth_frame, min_depth_count_list, mean_sample){
.SD <- NULL; variable <- NULL
if(mean_sample == "all_trees"){
min_depth_count_list[[1]][is.na(min_depth_count_list[[1]]$minimal_depth), "minimal_depth"] <- min_depth_count_list[[3]]
min_depth_means <-
Expand Down Expand Up @@ -89,7 +86,6 @@ get_min_depth_means <- function(min_depth_frame, min_depth_count_list, mean_samp
plot_min_depth_distribution <- function(min_depth_frame, k = 10, min_no_of_trees = 0,
mean_sample = "top_trees", mean_scale = FALSE, mean_round = 2,
main = "Distribution of minimal depth and its mean"){
minimal_depth <- NULL; mean_minimal_depth_label <- NULL; mean_minimal_depth <- NULL
if(any(c("randomForest", "ranger") %in% class(min_depth_frame))){
min_depth_frame <- min_depth_distribution(min_depth_frame)
}
Expand Down
4 changes: 0 additions & 4 deletions R/min_depth_interactions.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Calculate conditional depth in a tree with respect to all variables from vector vars
conditional_depth <- function(frame, vars){
`.SD` <- NULL; depth <- NULL; splitvarName <- NULL
index <- data.table::as.data.table(frame)[
!is.na(variable), .SD[which.min(depth), "number"], by = variable
]
Expand All @@ -27,7 +26,6 @@ conditional_depth <- function(frame, vars){

# Get a data frame with values of minimal depth conditional on selected variables for the whole forest
min_depth_interactions_values <- function(forest, vars){
`.` <- NULL; .SD <- NULL; tree <- NULL; `split var` <- NULL
interactions_frame <- as.data.frame(forest2df(forest))
interactions_frame[vars] <- NA_real_
interactions_frame <-
Expand Down Expand Up @@ -71,7 +69,6 @@ min_depth_interactions_values <- function(forest, vars){
#' @export
min_depth_interactions <- function(forest, vars = important_variables(measure_importance(forest)),
mean_sample = "top_trees", uncond_mean_sample = mean_sample){
variable <- NULL; `.` <- NULL; tree <- NULL; `split var` <- NULL; depth <- NULL
ntree <- ntrees(forest)
min_depth_interactions_frame <- min_depth_interactions_values(forest, vars)
mean_tree_depth <- min_depth_interactions_frame[[2]]
Expand Down Expand Up @@ -145,7 +142,6 @@ min_depth_interactions <- function(forest, vars = important_variables(measure_im
plot_min_depth_interactions <- function(interactions_frame, k = 30,
main = paste0("Mean minimal depth for ",
paste0(k, " most frequent interactions"))){
mean_min_depth <- NULL; occurrences <- NULL; uncond_mean_min_depth <- NULL
if(any(c("randomForest", "ranger") %in% class(interactions_frame))){
interactions_frame <- min_depth_interactions(interactions_frame)
}
Expand Down
23 changes: 21 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ max_na <- function(x) {
max(x, na.rm = TRUE)
}

utils::globalVariables(c("prediction", "variable"))

# Depth of each node of a single tree.
# The input is a matrix with left and right child nodes in 1:nrow(childs).
tree_depth <- function(childs) {
Expand Down Expand Up @@ -75,6 +73,27 @@ ntrees <- function(x) {
}

# Applies tree2df() to each tree and stacks the results
#' @importFrom data.table rbindlist
forest2df <- function(x) {
rbindlist(lapply(seq_len(ntrees(x)), function(i) tree2df(x, i)))
}

# Deal with the global variable note
utils::globalVariables(
c(
".",
".SD",
"depth",
"mean_min_depth",
"mean_minimal_depth",
"mean_minimal_depth_label",
"minimal_depth",
"occurrences",
"prediction",
"split var",
"splitvarName",
"tree",
"uncond_mean_min_depth",
"variable"
)
)

0 comments on commit e9bf7bd

Please sign in to comment.