Skip to content

Commit

Permalink
Merge branch 'master' into missing_values
Browse files Browse the repository at this point in the history
  • Loading branch information
mnwright committed May 16, 2024
2 parents 29f3ca0 + 5df7565 commit 191e298
Show file tree
Hide file tree
Showing 40 changed files with 930 additions and 176 deletions.
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2

updates:
# Keep dependencies for GitHub Actions up-to-date
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cpp-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Build
run: |
sudo apt-get install cmake
Expand All @@ -21,7 +21,7 @@ jobs:
macos:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Build
run: |
mkdir build && pushd build
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
permissions:
contents: write
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand All @@ -41,7 +41,7 @@ jobs:

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@v4.4.1
uses: JamesIves/github-pages-deploy-action@v4.6.0
with:
clean: false
branch: gh-pages
Expand Down
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: ranger
Type: Package
Title: A Fast Implementation of Random Forests
Version: 0.16.0
Date: 2023-11-09
Version: 0.16.1
Date: 2024-05-16
Author: Marvin N. Wright [aut, cre], Stefan Wager [ctb], Philipp Probst [ctb]
Maintainer: Marvin N. Wright <[email protected]>
Description: A fast implementation of Random Forests, particularly suited for high
Expand All @@ -19,7 +19,7 @@ Suggests:
survival,
testthat
Encoding: UTF-8
RoxygenNote: 7.2.3
URL: http://imbs-hl.github.io/ranger/,
RoxygenNote: 7.3.1
URL: https://imbs-hl.github.io/ranger/,
https://github.com/imbs-hl/ranger
BugReports: https://github.com/imbs-hl/ranger/issues
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(csrf)
export(deforest)
export(getTerminalNodeIDs)
export(holdoutRF)
export(hshrink)
export(importance)
export(importance_pvalues)
export(predictions)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@

# ranger 0.16.1
* Set num.threads=2 as default; respect environment variables and options
* Add hierarchical shrinkage
* Allow vector min.node.size and min.bucket for class-specific limits

# ranger 0.16.0
* New CRAN version

Expand Down
12 changes: 12 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,15 @@ randomObsNode <- function(groups, y, inbag_counts) {
.Call(`_ranger_randomObsNode`, groups, y, inbag_counts)
}

hshrink_regr <- function(left_children, right_children, num_samples_nodes, node_predictions, split_values, lambda, nodeID, parent_n, parent_pred, cum_sum) {
invisible(.Call(`_ranger_hshrink_regr`, left_children, right_children, num_samples_nodes, node_predictions, split_values, lambda, nodeID, parent_n, parent_pred, cum_sum))
}

hshrink_prob <- function(left_children, right_children, num_samples_nodes, class_freq, lambda, nodeID, parent_n, parent_pred, cum_sum) {
invisible(.Call(`_ranger_hshrink_prob`, left_children, right_children, num_samples_nodes, class_freq, lambda, nodeID, parent_n, parent_pred, cum_sum))
}

replace_class_counts <- function(class_counts_old, class_counts_new) {
invisible(.Call(`_ranger_replace_class_counts`, class_counts_old, class_counts_new))
}

90 changes: 90 additions & 0 deletions R/hshrink.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -------------------------------------------------------------------------------
# This file is part of Ranger.
#
# Ranger is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ranger is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ranger. If not, see <http://www.gnu.org/licenses/>.
#
# Written by:
#
# Marvin N. Wright
# Institut fuer Medizinische Biometrie und Statistik
# Universitaet zu Luebeck
# Ratzeburger Allee 160
# 23562 Luebeck
# Germany
#
# http://www.imbs-luebeck.de
# -------------------------------------------------------------------------------


#' Hierarchical shrinkage
#'
#' Apply hierarchical shrinkage to a ranger object.
#' Hierarchical shrinkage is a regularization technique that recursively shrinks node predictions towards parent node predictions.
#' For details see Agarwal et al. (2022).
#'
#' @param rf ranger object, created with \code{node.stats = TRUE}.
#' @param lambda Non-negative shrinkage parameter.
#'
#' @return The ranger object is modified in-place.
#'
#' @examples
##' @references
##' \itemize{
##' \item Agarwal, A., Tan, Y.S., Ronen, O., Singh, C. & Yu, B. (2022). Hierarchical Shrinkage: Improving the accuracy and interpretability of tree-based models. Proceedings of the 39th International Conference on Machine Learning, PMLR 162:111-135.
##' }
#' @author Marvin N. Wright
#' @export
hshrink <- function(rf, lambda) {
if (is.null(rf$forest$num.samples.nodes)) {
stop("Hierarchical shrinkage needs node statistics, set node.stats=TRUE in ranger() call.")
}
if (lambda < 0) {
stop("Shrinkage parameter lambda has to be non-negative.")
}

if (rf$treetype == "Regression") {
invisible(lapply(1:rf$num.trees, function(treeID) {
hshrink_regr(
rf$forest$child.nodeIDs[[treeID]][[1]], rf$forest$child.nodeIDs[[treeID]][[2]],
rf$forest$num.samples.nodes[[treeID]], rf$forest$node.predictions[[treeID]],
rf$forest$split.values[[treeID]], lambda, 0, 0, 0, 0
)
}))
} else if (rf$treetype == "Probability estimation") {
invisible(lapply(1:rf$num.trees, function(treeID) {
# Create temporary class frequency matrix
class_freq <- t(simplify2array(rf$forest$terminal.class.counts[[treeID]]))

parent_pred <- rep(0, length(rf$forest$class.values))
cum_sum <- rep(0, length(rf$forest$class.values))
hshrink_prob(
rf$forest$child.nodeIDs[[treeID]][[1]], rf$forest$child.nodeIDs[[treeID]][[2]],
rf$forest$num.samples.nodes[[treeID]], class_freq,
lambda, 0, 0, parent_pred, cum_sum
)

# Assign temporary matrix values back to ranger object
replace_class_counts(rf$forest$terminal.class.counts[[treeID]], class_freq)
}))
} else if (rf$treetype == "Classification") {
stop("To apply hierarchical shrinkage to classification forests, use probability=TRUE in the ranger() call.")
} else if (rf$treetype == "Survival") {
stop("Hierarchical shrinkage not yet implemented for survival.")
} else {
stop("Unknown treetype.")
}

}


22 changes: 22 additions & 0 deletions R/onAttach.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

.onAttach = function(libname, pkgname) {
if (!interactive()) {
return()
}

threads_env <- Sys.getenv("R_RANGER_NUM_THREADS")
threads_option1 <- getOption("ranger.num.threads")
threads_option2 <- getOption("Ncpus")

if (threads_env != "") {
thread_string <- paste(threads_env, "threads as set by environment variable R_RANGER_NUM_THREADS. Can be overwritten with num.threads.")
} else if (!is.null(threads_option1)) {
thread_string <- paste(threads_option1, "threads as set by options(ranger.num.threads = N). Can be overwritten with num.threads.")
} else if (!is.null(threads_option2)) {
thread_string <- paste(threads_option2, "threads as set by options(Ncpus = N). Can be overwritten with num.threads.")
} else {
thread_string <- "2 threads (default). Change with num.threads in ranger() and predict(), options(Ncpus = N), options(ranger.num.threads = N) or environment variable R_RANGER_NUM_THREADS."
}

packageStartupMessage(paste("ranger", packageVersion("ranger"), "using", thread_string))
}
12 changes: 9 additions & 3 deletions R/predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
##'
##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics.
##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object.
##'
##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable
##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order.
##'
##' @title Ranger prediction
##' @param object Ranger \code{ranger.forest} object.
Expand All @@ -45,7 +48,7 @@
##' @param type Type of prediction. One of 'response', 'se', 'terminalNodes', 'quantiles' with default 'response'. See below for details.
##' @param se.method Method to compute standard errors. One of 'jack', 'infjack' with default 'infjack'. Only applicable if type = 'se'. See below for details.
##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.
##' @param num.threads Number of threads. Default is number of CPUs available.
##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).
##' @param verbose Verbose output on or off.
##' @param inbag.counts Number of times the observations are in-bag in the trees.
##' @param ... further arguments passed to or from other methods.
Expand Down Expand Up @@ -186,7 +189,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
## Num threads
## Default 0 -> detect from system in C++.
if (is.null(num.threads)) {
num.threads = 0
num.threads <- as.integer(Sys.getenv("R_RANGER_NUM_THREADS", getOption("ranger.num.threads", getOption("Ncpus", 2L))))
} else if (!is.numeric(num.threads) || num.threads < 0) {
stop("Error: Invalid value for num.threads")
}
Expand Down Expand Up @@ -426,6 +429,9 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
##'
##' For classification and \code{predict.all = TRUE}, a factor levels are returned as numerics.
##' To retrieve the corresponding factor levels, use \code{rf$forest$levels}, if \code{rf} is the ranger object.
##'
##' By default, ranger uses 2 threads. The default can be changed with: (1) \code{num.threads} in ranger/predict call, (2) environment variable
##' R_RANGER_NUM_THREADS, (3) \code{options(ranger.num.threads = N)}, (4) \code{options(Ncpus = N)}, with precedence in that order.
##'
##' @title Ranger prediction
##' @param object Ranger \code{ranger} object.
Expand All @@ -437,7 +443,7 @@ predict.ranger.forest <- function(object, data, predict.all = FALSE,
##' @param quantiles Vector of quantiles for quantile prediction. Set \code{type = 'quantiles'} to use.
##' @param what User specified function for quantile prediction used instead of \code{quantile}. Must return numeric vector, see examples.
##' @param seed Random seed. Default is \code{NULL}, which generates the seed from \code{R}. Set to \code{0} to ignore the \code{R} seed. The seed is used in case of ties in classification mode.
##' @param num.threads Number of threads. Default is number of CPUs available.
##' @param num.threads Number of threads. Use 0 for all available cores. Default is 2 if not set by options/environment variables (see below).
##' @param verbose Verbose output on or off.
##' @param ... further arguments passed to or from other methods.
##' @return Object of class \code{ranger.prediction} with elements
Expand Down
Loading

0 comments on commit 191e298

Please sign in to comment.