Merge pull request #151 from NSAPH-Software/release_ver0.2.5

Release ver0.2.5
NSAPH-Software · Dec 6, 2023 · 90edb4c · 90edb4c
2 parents 850e667 + f5213d1
commit 90edb4c
Show file tree

Hide file tree

Showing 149 changed files with 3,612 additions and 1,723 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,6 +1,7 @@
 ^CRE\.Rproj$
 ^\.Rproj\.user$
 ^LICENSE\.md$
+^LICENSE$
 ^\.github$
 _archive
 ^_pkgdown\.yml$
@@ -19,3 +20,4 @@ index.md
 ^CRAN-SUBMISSION$
 ^paper/*$
 ^CODE_OF_CONDUCT\.md$
+
diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml
@@ -0,0 +1,23 @@
+on: [push]
+
+jobs:
+  paper:
+    runs-on: ubuntu-latest
+    name: Paper Draft
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Build draft PDF
+        uses: openjournals/openjournals-draft-action@master
+        with:
+          journal: joss
+          # This should be the path to the paper within your repo.
+          paper-path: paper/paper.md
+      - name: Upload
+        uses: actions/upload-artifact@v1
+        with:
+          name: paper
+          # This is the output path where Pandoc will write the compiled
+          # PDF. Note, this should be the same directory as the input
+          # paper.md
+          path: paper/paper.pdf
diff --git a/.gitignore b/.gitignore
@@ -15,3 +15,4 @@ src-i386
 *.log
 .Rdata
 .httr-oauth
+functional_tests/results/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: CRE
 Title: Interpretable Discovery and Inference of Heterogeneous Treatment Effects
-Version: 0.2.4
+Version: 0.2.5
 Authors@R: c(
     person("Naeem", "Khoshnevis", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-4315-1426", AFFILIATION = "FASRC")),

diff --git a/LICENSE b/LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 S3method(plot,cre)
+S3method(predict,cre)
 S3method(print,cre)
 S3method(summary,cre)
 export(cre)

diff --git a/NEWS.md b/NEWS.md
@@ -1,13 +1,48 @@
+# CRE 0.2.5 (2023-12-6)
+
+## Added
+* Add (vanilla) Stability Selection (without Error Control).
+* `max_rules` hyper parameters for max rules filtering.
+* Uncertainty Quantification in estimation by bootstrapping.
+* `B` hyper-parameter,
+* `subsample` hyper-parameter.
+* `rules`(implicit form) in cre() function return.
+* predict() function for ITE estimation via CRE.
+
+## Changed 
+* Type `stability_selection` binary -> string ('no','vanilla','error_control').
+* Unify `ntrees_gbm` hyper-parameter and `ntrees_gbm` hyper-parameter in 
+`ntrees` hyper-parameter.
+* In rules generation retrieve decision rules also from internal nodes, and not
+just from terminal nodes.
+* `ite_method_dis`, `ite_method_inf` method-parameter -> `ite_method`.
+* `ps_method_dis`, `ps_method_inf` method-parameter -> `learner_ps`.
+* `oreg_method_dis`, `oreg_method_inf` method-parameter -> `learner_y`.
+
+## Removed
+* `max_nodes` hyper-parameter.
+* Remove rules generation by Generalized Boosted Regression.
+* `replace` hyper-parameter.
+* `penalty_rl` hyper-parameter.
+* `t_pvalue` hyper-parameter.
+* `ite_pred` from cre() function return.
+
+## Bug fixes
+* Error saving covariates name in CRE result when using `intervention_vars`.
+
+
 # CRE 0.2.4 (2023-6-14)
 
 ## Changed 
 *  Method paper description is updated.
 
+
 # CRE 0.2.3 (2023-4-27)
 
 ## Removed 
 * Bayesian Causal Forest (`bcf`) ITE estimator is not supported.
 
+
 # CRE 0.2.2 (2023-4-17)
 
 ## Changed 

diff --git a/R/CRE_package.R b/R/CRE_package.R
@@ -1,23 +1,23 @@
 #' @title
-#' The 'CRE' package
+#' The CRE package
 #'
 #' @description
-#' In health and social sciences, it is critically important to 
-#' identify subgroups of the study population where a treatment 
-#' has notable heterogeneity in the causal effects with respect 
-#' to the average treatment effect. Data-driven discovery of 
-#' heterogeneous treatment effects (HTE) via decision tree methods 
-#' has been proposed for this task. Despite its high interpretability, 
-#' the single-tree discovery of HTE tends to be highly unstable and to 
-#' find an oversimplified representation of treatment heterogeneity. 
-#' To accommodate these shortcomings, we propose Causal Rule Ensemble 
-#' (CRE), a new method to discover heterogeneous subgroups through an 
-#' ensemble-of-trees approach. CRE has the following features: 
-#' 1) provides an interpretable representation of the HTE; 2) allows 
-#' extensive exploration of complex heterogeneity patterns; and 3) 
-#' guarantees high stability in the discovery. The discovered subgroups 
-#' are defined in terms of interpretable decision rules, and we develop 
-#' a general two-stage approach for subgroup-specific conditional 
+#' In health and social sciences, it is critically important to
+#' identify subgroups of the study population where a treatment
+#' has notable heterogeneity in the causal effects with respect
+#' to the average treatment effect. Data-driven discovery of
+#' heterogeneous treatment effects (HTE) via decision tree methods
+#' has been proposed for this task. Despite its high interpretability,
+#' the single-tree discovery of HTE tends to be highly unstable and to
+#' find an oversimplified representation of treatment heterogeneity.
+#' To accommodate these shortcomings, we propose Causal Rule Ensemble
+#' (CRE), a new method to discover heterogeneous subgroups through an
+#' ensemble-of-trees approach. CRE has the following features:
+#' 1) provides an interpretable representation of the HTE; 2) allows
+#' extensive exploration of complex heterogeneity patterns; and 3)
+#' guarantees high stability in the discovery. The discovered subgroups
+#' are defined in terms of interpretable decision rules, and we develop
+#' a general two-stage approach for subgroup-specific conditional
 #' causal effects estimation, providing theoretical guarantees.
 #'
 #' @docType package
@@ -39,8 +39,8 @@
 #' @importFrom methods as
 #'
 #' @references
-#' Bargagli-Stoffi, F. J., Cadei, R., Lee, K. and Dominici, F. (2023). 
-#' Causal rule ensemble: Interpretable Discovery and Inference of 
-#' Heterogeneous Treatment Effects,arXiv preprint arXiv:2009.09036 
+#' Bargagli-Stoffi, F. J., Cadei, R., Lee, K. and Dominici, F. (2023).
+#' Causal rule ensemble: Interpretable Discovery and Inference of
+#' Heterogeneous Treatment Effects,arXiv preprint arXiv:2009.09036
 #'
 NULL
diff --git a/R/check_hyper_params.R b/R/check_hyper_params.R
@@ -2,7 +2,7 @@
 #' Check input parameters
 #'
 #' @description
-#' Checks consistency in input (hyper) parameters for the cre function.
+#' Checks consistency in input (hyper) parameters for the `cre` function.
 #'
 #' @param X_names The observed covariates names.
 #' @param params The list of parameters required to run the function.
@@ -18,30 +18,18 @@ check_hyper_params <- function(X_names, params) {
   logger::log_debug("Checking hyper parameters...")
 
   # Input params checks --------------------------------------------------------
-  ntrees_rf <- getElement(params, "ntrees_rf")
-  if (length(ntrees_rf) == 0) {
-    ntrees_rf <- 20
+  ntrees <- getElement(params, "ntrees")
+  if (length(ntrees) == 0) {
+    ntrees <- 20
   } else {
-    if (!inherits(ntrees_rf, "numeric")) {
-      stop("Invalid 'ntrees_rf' input. Please input a number.")
+    if (!inherits(ntrees, "numeric")) {
+      stop("Invalid 'ntrees' input. Please input a positive integer")
     }
-  }
-  params[["ntrees_rf"]] <- ntrees_rf
-
-  ntrees_gbm <- getElement(params, "ntrees_gbm")
-  if (length(ntrees_gbm) == 0) {
-    ntrees_gbm <- 20
-  } else {
-    if (!inherits(ntrees_gbm, "numeric")) {
-      stop("Invalid 'ntrees_gbm' input. Please input a number.")
+    if (ntrees<1) {
+      stop("Invalid 'ntrees' input. Please input a positive integer")
     }
   }
-  params[["ntrees_gbm"]] <- ntrees_gbm
-
-  if (params[["ntrees_gbm"]] + params[["ntrees_rf"]] == 0) {
-    stop("The total number of trees (ntrees_rf + ntrees_gbm) has to be
-         greater than 0")
-  }
+  params[["ntrees"]] <- ntrees
 
   node_size <- getElement(params, "node_size")
   if (length(node_size) == 0) {
@@ -53,15 +41,15 @@ check_hyper_params <- function(X_names, params) {
   }
   params[["node_size"]] <- node_size
 
-  max_nodes <- getElement(params, "max_nodes")
-  if (length(max_nodes) == 0) {
-    max_nodes <- 5
+  max_rules <- getElement(params, "max_rules")
+  if (length(max_rules) == 0) {
+    max_rules <- 50
   } else {
-    if (!inherits(max_nodes, "numeric")) {
-      stop("Invalid 'max_nodes' input. Please input a number.")
+    if (!inherits(max_rules, "numeric")) {
+      stop("Invalid 'max_rules' input. Please input a number.")
     }
   }
-  params[["max_nodes"]] <- max_nodes
+  params[["max_rules"]] <- max_rules
 
   max_depth <- getElement(params, "max_depth")
   if (length(max_depth) == 0) {
@@ -119,58 +107,36 @@ check_hyper_params <- function(X_names, params) {
   }
   params[["t_corr"]] <- t_corr
 
-  t_pvalue <- getElement(params, "t_pvalue")
-  if (length(t_pvalue) == 0) {
-    t_pvalue <- 0.05
+  stability_selection <- getElement(params, "stability_selection")
+  if (length(stability_selection) == 0) {
+    stability_selection <- "vanilla"
   } else {
-    if (!inherits(t_pvalue, "numeric")) {
-      stop("Invalid 't_pvalue' input. Please input a number.")
+    if (!(stability_selection %in% c("error_control", "no","vanilla"))) {
+      stop(paste0("Invalid `stability_selection` argument. Please input ",
+                  "a value among: {`no`, `vanilla`, `error_control`}."))
     }
   }
-  params[["t_pvalue"]] <- t_pvalue
+  params[["stability_selection"]] <- stability_selection
 
-  stability_selection <- getElement(params, "stability_selection")
-  pfer <- getElement(params, "pfer")
   cutoff <- getElement(params, "cutoff")
-  if (length(stability_selection) == 0) {
-    stability_selection <- TRUE
-    pfer <- 1
+  if (length(cutoff) == 0) {
     cutoff <- 0.9
   } else {
-    if (!(stability_selection %in% c(TRUE, FALSE))) {
-      stop(paste0("Please specify 'TRUE' or 'FALSE' for",
-                  " the stability_selection argument."))
-    } else if (stability_selection) {
-      if (length(pfer) == 0) {
-        pfer <-  1
-      } else {
-        if (!inherits(pfer, "numeric")) {
-          stop("Invalid 'pfer' input. Please input a number.")
-        }
-      }
-      if (length(cutoff) == 0) {
-        cutoff <-  0.9
-      } else {
-        if (!inherits(cutoff, "numeric")) {
-          stop("Invalid 'cutoff' input. Please input a number.")
-        }
-      }
+    if (!inherits(cutoff, "numeric")) {
+      stop("Invalid 'cutoff' input. Please input a number.")
     }
   }
-  params[["stability_selection"]] <- stability_selection
-  params[["pfer"]] <- pfer
   params[["cutoff"]] <- cutoff
 
-
-  penalty_rl <- getElement(params, "penalty_rl")
-  if (length(penalty_rl) == 0) {
-    penalty_rl <- 1
+  pfer <- getElement(params, "pfer")
+  if (length(pfer) == 0) {
+    pfer <- 1
   } else {
-    if (!inherits(penalty_rl, "numeric")) {
-      stop("Invalid 'penalty_rl' input. Please input a number.")
+    if (!inherits(pfer, "numeric")) {
+      stop("Invalid 'pfer' input. Please input a number.")
     }
   }
-  params[["penalty_rl"]] <- penalty_rl
+  params[["pfer"]] <- pfer
 
   intervention_vars <- getElement(params, "intervention_vars")
   if (length(intervention_vars) == 0) {
@@ -196,6 +162,28 @@ check_hyper_params <- function(X_names, params) {
   }
   params[["offset"]] <- offset
 
+  # Check for correct B input
+  B <- getElement(params, "B")
+  if (length(B) == 0) {
+    B <- 20
+  } else {
+    if (!inherits(B, "numeric")) {
+      stop("Invalid 'B' input. Please input an integer.")
+    }
+  }
+  params[["B"]] <- B
+
+  # Check for correct subsample imput
+  subsample <- getElement(params, "subsample")
+  if (length(subsample) == 0) {
+    subsample <- 0.5
+  } else {
+    if (!inherits(subsample, "numeric") || (subsample < 0) || (subsample > 1)) {
+      stop("Invalid 'subsample' input. Please input a number between 0 and 1.")
+    }
+  }
+  params[["subsample"]] <- subsample
+
   logger::log_debug("Done with checking hyper parameters.")
 
   return(params)

diff --git a/R/check_input_data.R b/R/check_input_data.R
@@ -2,7 +2,7 @@
 #' Check input data
 #'
 #' @description
-#' Conducts sanity checks for the input data
+#' Conducts sanity checks for the input data.
 #'
 #' @param y The observed response vector.
 #' @param z The treatment vector.
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,3 +15,4 @@ src-i386 @@
     *.log
     .Rdata
     .httr-oauth
+    functional_tests/results/