From 61eb4b8610deba2f3662f273bc6f7a70e3e7651c Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Tue, 20 Jul 2021 14:35:01 +0200 Subject: [PATCH 01/16] add dgp for high-dimensional instruments as in Belloni et al.(2012) --- NAMESPACE | 1 + R/datasets.R | 134 ++++++++++++++++++++++++++++++++++++++ man/make_pliv_BCCH2012.Rd | 93 ++++++++++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 man/make_pliv_BCCH2012.Rd diff --git a/NAMESPACE b/NAMESPACE index 49283191..8d463b36 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ export(fetch_401k) export(fetch_bonus) export(make_iivm_data) export(make_irm_data) +export(make_pliv_BCCH2012) export(make_pliv_CHS2015) export(make_plr_CCDDHNR2018) export(make_plr_turrell2018) diff --git a/R/datasets.R b/R/datasets.R index 70877205..b75f0612 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -552,6 +552,140 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, return(data) } +#' @title Generates data from a partially linear IV regression model used in +#' Belloni et al (2012). +#' +#' @description +#' Generates data from a linear IV regression model used in +#' Belloni et al. (2012). The data generating process +#' is defined as +#' +#' \eqn{y_i = \beta d_i + e_i,} +#' +#' \eqn{d_i = z_i'\Pi + v_i,} +#' +#' with i.i.d. +#' +#' \eqn{(e_i, v_i) \sim \mathcal{N} \left(0, \left( \begin{array}{cc} +#' \sigma^2_e & \sigma_{ev} \\ \sigma_{ev} & \sigma^2_v\end{array} +#' \right) \right),} +#' +#' with \eqn{\beta} being the parameter of interests and +#' \eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{p_z - 1} \right)}, +#' instrumental variables \eqn{z_i = (z_{i1}, \ldots, z_{ip_z})} drawn from a +#' normal distribution \eqn{N(0,\Sigma)} with covariance matrix \eqn{\Sigma_Z} and +#' \eqn{E[z^2_{ih}]=\sigma^2_z} and \eqn{Corr(z_{ih}, z_{ij})=\rho^{j-h}}. +#' The sparsity parameter `s` can be used to set coefficients in \eqn{\Pi} +#' with \eqn{j>s} exactly to zero, i.e., +#' \eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{s}, 0, +#' \ldots , 0 \right)}. +#' +#' Default values are set to \eqn{\rho = 0.5}, \eqn{\sigma^2_e = 1}, +#' \eqn{\sigma^2_z = 1} and \eqn{Corr(e,v) = 0.6}. For the coefficient vectors +#' defaults are set such that \eqn{\beta = 1} and \eqn{\pi_0 = 0.7}. +#' +#' @references Belloni, A., Chen, D., Chernozhukov, V., and Hansen, C. (2012), +#' Sparse Models and Methods for Optimal Instruments with an Application to +#' Eminent Domain. Econometrica, 80 (6): 2369-2429. +#' +#' @param n_obs (`integer(1)`) \cr +#' The number of observations to simulate. +#' +#' @param beta (`numeric(1)`) \cr +#' The value of the causal parameter. +#' +#' @param dim_z (`integer(1)`) \cr +#' The number of instruments. +#' +#' @param pi_0 (`numeric(1)`) \cr +#' Coefficient vector in first-stage equation. +#' +#' @param s (`integer(1)`) \cr +#' Sparsity index. +#' +#' @param rho (`numeric(1)`) \cr +#' Coefficient determining correlation between instruments. +#' +#' @param sigma_z (`numeric(1)`) \cr +#' Standard deviation of instruments. +#' +#' @param corr (`numeric(1)`) \cr +#' Correlation between errors \eqn{e} and \eqn{v}. +#' +#' @param sigma_e (`numeric(1)`) \cr +#' Standard deviation for error \eqn{e}. +#' +#' @param return_type (`character(1)`) \cr +#' If `"DoubleMLData"`, returns a `DoubleMLData` object. +#' If `"data.frame"` returns a `data.frame()`. +#' If `"data.table"` returns a `data.table()`. +#' If `"matrix"` a named `list()` with entries `X`, `y`, `d` and +#' `z` is returned. +#' Every entry in the list is a `matrix()` object. Default is `"DoubleMLData"`. +#' +#' @return A data object according to the choice of `return_type`. +#' +#' @export +make_pliv_BCCH2012 = function(n_obs = 100, beta = 1, dim_z = 100, pi_0 = 0.7, + s = 0, + rho = 0.5, sigma_z = 1, + corr = 0.6, sigma_e = 1, + return_type = "DoubleMLData") { + assert_count(n_obs) + assert_numeric(beta, len = 1) + assert_count(dim_z) + assert_numeric(pi_0, len = 1) + assert_count(s, positive = FALSE) + assert_numeric(rho, len = 1) + assert_numeric(sigma_z, len = 1) + assert_numeric(corr, len = 1) + assert_numeric(sigma_e, len = 1) + assert_choice( + return_type, + c("data.table", "matrix", "data.frame", "DoubleMLData")) + + sigma_e_v = matrix(c(sigma_e^2, corr, corr, 1), ncol = 2) + mu_e_v = rep(0, 2) + e_v = rmvnorm(n = n_obs, mean = mu_e_v, sigma = sigma_e_v) + e = e_v[, 1] + v = e_v[, 2] + + sigma_z = toeplitz(rho^(0:(dim_z - 1))) + mu_z = rep(0, dim_z) + z = rmvnorm(n = n_obs, mean = mu_z, sigma = sigma_z) + + pi = pi_0^(0:(dim_z-1)) + if (s > 0) { + pi[(s+1):dim_z] = 0 + } + d = z %*% pi + v + y = beta * d + e + + if (return_type == "matrix") { + return(list("y" = y, "d" = d, "z" = z)) + } else { + colnames(z) = paste0("Z", 1:dim_z) + colnames(y) = "y" + colnames(d) = "d" + + if (return_type == "data.frame") { + data = data.frame(y, d, z) + return(data) + } else if (return_type == "data.table") { + data = data.table(y, d, z) + return(data) + } else if (return_type == "DoubleMLData") { + dt = data.table(y, d, z) + data = DoubleMLData$new(dt, + y_col = "y", d_cols = "d", + x_cols = NULL, + z_cols = colnames(z)) + return(data) + } + } + return(data) +} + #' @title Generates data from a interactive regression (IRM) model. #' diff --git a/man/make_pliv_BCCH2012.Rd b/man/make_pliv_BCCH2012.Rd new file mode 100644 index 00000000..06f5a105 --- /dev/null +++ b/man/make_pliv_BCCH2012.Rd @@ -0,0 +1,93 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{make_pliv_BCCH2012} +\alias{make_pliv_BCCH2012} +\title{Generates data from a partially linear IV regression model used in +Belloni et al (2012).} +\usage{ +make_pliv_BCCH2012( + n_obs = 100, + beta = 1, + dim_z = 100, + pi_0 = 0.7, + s = 0, + rho = 0.5, + sigma_z = 1, + corr = 0.6, + sigma_e = 1, + return_type = "DoubleMLData" +) +} +\arguments{ +\item{n_obs}{(\code{integer(1)}) \cr +The number of observations to simulate.} + +\item{beta}{(\code{numeric(1)}) \cr +The value of the causal parameter.} + +\item{dim_z}{(\code{integer(1)}) \cr +The number of instruments.} + +\item{pi_0}{(\code{numeric(1)}) \cr +Coefficient vector in first-stage equation.} + +\item{s}{(\code{integer(1)}) \cr +Sparsity index.} + +\item{rho}{(\code{numeric(1)}) \cr +Coefficient determining correlation between instruments.} + +\item{sigma_z}{(\code{numeric(1)}) \cr +Standard deviation of instruments.} + +\item{corr}{(\code{numeric(1)}) \cr +Correlation between errors \eqn{e} and \eqn{v}.} + +\item{sigma_e}{(\code{numeric(1)}) \cr +Standard deviation for error \eqn{e}.} + +\item{return_type}{(\code{character(1)}) \cr +If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object. +If \code{"data.frame"} returns a \code{data.frame()}. +If \code{"data.table"} returns a \code{data.table()}. +If \code{"matrix"} a named \code{list()} with entries \code{X}, \code{y}, \code{d} and +\code{z} is returned. +Every entry in the list is a \code{matrix()} object. Default is \code{"DoubleMLData"}.} +} +\value{ +A data object according to the choice of \code{return_type}. +} +\description{ +Generates data from a linear IV regression model used in +Belloni et al. (2012). The data generating process +is defined as + +\eqn{y_i = \beta d_i + e_i,} + +\eqn{d_i = z_i'\Pi + v_i,} + +with i.i.d. + +\eqn{(e_i, v_i) \sim \mathcal{N} \left(0, \left( \begin{array}{cc} +\sigma^2_e & \sigma_{ev} \\ \sigma_{ev} & \sigma^2_v\end{array} +\right) \right),} + +with \eqn{\beta} being the parameter of interests and +\eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{p_z - 1} \right)}, +instrumental variables \eqn{z_i = (z_{i1}, \ldots, z_{ip_z})} drawn from a +normal distribution \eqn{N(0,\Sigma)} with covariance matrix \eqn{\Sigma_Z} and +\eqn{E[z^2_{ih}]=\sigma^2_z} and \eqn{Corr(z_{ih}, z_{ij})=\rho^{j-h}}. +The sparsity parameter \code{s} can be used to set coefficients in \eqn{\Pi} +with \eqn{j>s} exactly to zero, i.e., +\eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{s}, 0, +\ldots , 0 \right)}. + +Default values are set to \eqn{\rho = 0.5}, \eqn{\sigma^2_e = 1}, +\eqn{\sigma^2_z = 1} and \eqn{Corr(e,v) = 0.6}. For the coefficient vectors +defaults are set such that \eqn{\beta = 1} and \eqn{\pi_0 = 0.7}. +} +\references{ +Belloni, A., Chen, D., Chernozhukov, V., and Hansen, C. (2012), +Sparse Models and Methods for Optimal Instruments with an Application to +Eminent Domain. Econometrica, 80 (6): 2369-2429. +} From 225591d5e45d90b16d5c810ac8784489fba4b15e Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Tue, 20 Jul 2021 14:41:32 +0200 Subject: [PATCH 02/16] add BCCH2012 to references index --- _pkgdown.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/_pkgdown.yml b/_pkgdown.yml index ba8a788a..8b864bc9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -61,6 +61,7 @@ reference: - title: Datasets generators contents: - make_plr_CCDDHNR2018 + - make_pliv_BCCH2012 - make_pliv_CHS2015 - make_irm_data - make_iivm_data From afc3f66b871515ecd4f684bbda8be54daff55118 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Tue, 20 Jul 2021 19:15:06 +0200 Subject: [PATCH 03/16] use partialled out versions of D and Y (based on OLS) in score, case DoubleMLPLIVpartialZ --- R/double_ml_pliv.R | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/R/double_ml_pliv.R b/R/double_ml_pliv.R index 503bec88..31d31183 100644 --- a/R/double_ml_pliv.R +++ b/R/double_ml_pliv.R @@ -456,11 +456,38 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", d = self$data$data_model[[self$data$treat_col]] y = self$data$data_model[[self$data$y_col]] - + + if ( ! (is.null(self$data$x_cols) | length(self$data$x_cols) == 0)) { + # Partial out Xs from y and d based on linear regression + task_part_y = initiate_task("lm_part_out_y", self$data$data_model, + target = self$data$y_col, + select_cols = c(self$data$x_cols, + self$data$other_treat_cols), + "LearnerRegr") + learner_lm = LearnerRegrLM$new() + resampling_part_y = rsmp("insample")$instantiate(task_part_y) + r_part_y = resample(task_part_y, learner_lm, resampling_part_y, + store_models = TRUE) + u_hat = as.data.table(r_part_y$prediction())$response + + task_part_d = initiate_task("lm_part_out_d", self$data$data_model, + target = self$data$treat_col, + select_cols = c(self$data$x_cols, + self$data$other_treat_cols), + "LearnerRegr") + resampling_part_d = rsmp("insample")$instantiate(task_part_d) + r_part_d = resample(task_part_d, learner_lm, resampling_part_d, + store_models = TRUE) + w_hat = as.data.table(r_part_d$prediction())$response + } else { + w_hat = d + u_hat = y + } + if (is.character(self$score)) { if (self$score == "partialling out") { - psi_a = -r_hat * d - psi_b = r_hat * y + psi_a = -r_hat * w_hat + psi_b = r_hat * u_hat } res = list(psi_a = psi_a, psi_b = psi_b) } else if (is.function(self$score)) { From dd365ce84e6ed624e3a75a1ea1fbbfeba13033df Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Wed, 21 Jul 2021 10:46:15 +0200 Subject: [PATCH 04/16] fix DGP BCCH2012 --- R/datasets.R | 48 +++++++++++++++++++++++++-------------- man/make_pliv_BCCH2012.Rd | 24 +++++++++++++------- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/R/datasets.R b/R/datasets.R index b75f0612..cb596a47 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -553,7 +553,7 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, } #' @title Generates data from a partially linear IV regression model used in -#' Belloni et al (2012). +#' Belloni et al. (2012). #' #' @description #' Generates data from a linear IV regression model used in @@ -571,18 +571,22 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, #' \right) \right),} #' #' with \eqn{\beta} being the parameter of interests and -#' \eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{p_z - 1} \right)}, -#' instrumental variables \eqn{z_i = (z_{i1}, \ldots, z_{ip_z})} drawn from a -#' normal distribution \eqn{N(0,\Sigma)} with covariance matrix \eqn{\Sigma_Z} and -#' \eqn{E[z^2_{ih}]=\sigma^2_z} and \eqn{Corr(z_{ih}, z_{ij})=\rho^{j-h}}. +#' \eqn{\Pi = C \cdot \left(\pi_0^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{p_z - 1} +#' \right)}, instrumental variables \eqn{z_i = (z_{i1}, \ldots, z_{ip_z})} drawn +#' from a normal distribution \eqn{N(0,\Sigma)} with covariance matrix +#' \eqn{\Sigma_Z} and \eqn{E[z^2_{ih}]=\sigma^2_z} and \eqn{Corr(z_{ih}, +#' z_{ij})=\rho^{j-h}}. #' The sparsity parameter `s` can be used to set coefficients in \eqn{\Pi} #' with \eqn{j>s} exactly to zero, i.e., -#' \eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{s}, 0, -#' \ldots , 0 \right)}. +#' \eqn{\Pi = C \cdot \left(\pi_0^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{s}, 0, +#' \ldots , 0 \right)}. The constant \eqn{C} is calibrated internally such that +#' the concentration parameter \eqn{\mu^2} is set to a specific value specified +#' via `mu2`. #' #' Default values are set to \eqn{\rho = 0.5}, \eqn{\sigma^2_e = 1}, #' \eqn{\sigma^2_z = 1} and \eqn{Corr(e,v) = 0.6}. For the coefficient vectors -#' defaults are set such that \eqn{\beta = 1} and \eqn{\pi_0 = 0.7}. +#' defaults are set such that \eqn{\beta = 1}, \eqn{\mu^2 = 30} and +#' \eqn{\pi_0 = 0.7}. #' #' @references Belloni, A., Chen, D., Chernozhukov, V., and Hansen, C. (2012), #' Sparse Models and Methods for Optimal Instruments with an Application to @@ -603,6 +607,9 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, #' @param s (`integer(1)`) \cr #' Sparsity index. #' +#' @param mu2 (`numeric(1)`) \cr +#' Value of concentration parameter used for calibration of constant \eqn{C}. +#' #' @param rho (`numeric(1)`) \cr #' Coefficient determining correlation between instruments. #' @@ -627,15 +634,18 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, #' #' @export make_pliv_BCCH2012 = function(n_obs = 100, beta = 1, dim_z = 100, pi_0 = 0.7, - s = 0, + s = 0, mu2 = 30, rho = 0.5, sigma_z = 1, corr = 0.6, sigma_e = 1, return_type = "DoubleMLData") { + # based on https://www.econometricsociety.org/content/supplement-sparse-models-and-methods-optimal-instruments-application-eminent-domain-1 and + # http://qed.econ.queensu.ca/jae/datasets/spindler001/ assert_count(n_obs) assert_numeric(beta, len = 1) assert_count(dim_z) assert_numeric(pi_0, len = 1) assert_count(s, positive = FALSE) + assert_numeric(mu2, len = 1) assert_numeric(rho, len = 1) assert_numeric(sigma_z, len = 1) assert_numeric(corr, len = 1) @@ -644,21 +654,25 @@ make_pliv_BCCH2012 = function(n_obs = 100, beta = 1, dim_z = 100, pi_0 = 0.7, return_type, c("data.table", "matrix", "data.frame", "DoubleMLData")) - sigma_e_v = matrix(c(sigma_e^2, corr, corr, 1), ncol = 2) - mu_e_v = rep(0, 2) - e_v = rmvnorm(n = n_obs, mean = mu_e_v, sigma = sigma_e_v) - e = e_v[, 1] - v = e_v[, 2] - sigma_z = toeplitz(rho^(0:(dim_z - 1))) mu_z = rep(0, dim_z) z = rmvnorm(n = n_obs, mean = mu_z, sigma = sigma_z) - pi = pi_0^(0:(dim_z-1)) + + scale = sqrt(mu2/((n_obs + mu2)*pi %*% sigma_z %*% pi)) + sigma_v = sqrt(1 - (scale^2)*t(pi) %*% sigma_z %*% pi) + sev = corr * sigma_e * sigma_v + + sigma_e_v = matrix(c(sigma_e^2, sev, sev, sigma_v^2), ncol = 2) + mu_e_v = rep(0, 2) + e_v = rmvnorm(n = n_obs, mean = mu_e_v, sigma = sigma_e_v) + e = e_v[, 1] + v = e_v[, 2] + if (s > 0) { pi[(s+1):dim_z] = 0 } - d = z %*% pi + v + d = scale * z %*% pi + v y = beta * d + e if (return_type == "matrix") { diff --git a/man/make_pliv_BCCH2012.Rd b/man/make_pliv_BCCH2012.Rd index 06f5a105..a3ec3b6e 100644 --- a/man/make_pliv_BCCH2012.Rd +++ b/man/make_pliv_BCCH2012.Rd @@ -3,7 +3,7 @@ \name{make_pliv_BCCH2012} \alias{make_pliv_BCCH2012} \title{Generates data from a partially linear IV regression model used in -Belloni et al (2012).} +Belloni et al. (2012).} \usage{ make_pliv_BCCH2012( n_obs = 100, @@ -11,6 +11,7 @@ make_pliv_BCCH2012( dim_z = 100, pi_0 = 0.7, s = 0, + mu2 = 30, rho = 0.5, sigma_z = 1, corr = 0.6, @@ -34,6 +35,9 @@ Coefficient vector in first-stage equation.} \item{s}{(\code{integer(1)}) \cr Sparsity index.} +\item{mu2}{(\code{numeric(1)}) \cr +Value of concentration parameter used for calibration of constant \eqn{C}.} + \item{rho}{(\code{numeric(1)}) \cr Coefficient determining correlation between instruments.} @@ -73,18 +77,22 @@ with i.i.d. \right) \right),} with \eqn{\beta} being the parameter of interests and -\eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{p_z - 1} \right)}, -instrumental variables \eqn{z_i = (z_{i1}, \ldots, z_{ip_z})} drawn from a -normal distribution \eqn{N(0,\Sigma)} with covariance matrix \eqn{\Sigma_Z} and -\eqn{E[z^2_{ih}]=\sigma^2_z} and \eqn{Corr(z_{ih}, z_{ij})=\rho^{j-h}}. +\eqn{\Pi = C \cdot \left(\pi_0^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{p_z - 1} +\right)}, instrumental variables \eqn{z_i = (z_{i1}, \ldots, z_{ip_z})} drawn +from a normal distribution \eqn{N(0,\Sigma)} with covariance matrix +\eqn{\Sigma_Z} and \eqn{E[z^2_{ih}]=\sigma^2_z} and \eqn{Corr(z_{ih}, +z_{ij})=\rho^{j-h}}. The sparsity parameter \code{s} can be used to set coefficients in \eqn{\Pi} with \eqn{j>s} exactly to zero, i.e., -\eqn{\Pi = \left(\pi^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{s}, 0, -\ldots , 0 \right)}. +\eqn{\Pi = C \cdot \left(\pi_0^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{s}, 0, +\ldots , 0 \right)}. The constant \eqn{C} is calibrated internally such that +the concentration parameter \eqn{\mu^2} is set to a specific value specified +via \code{mu2}. Default values are set to \eqn{\rho = 0.5}, \eqn{\sigma^2_e = 1}, \eqn{\sigma^2_z = 1} and \eqn{Corr(e,v) = 0.6}. For the coefficient vectors -defaults are set such that \eqn{\beta = 1} and \eqn{\pi_0 = 0.7}. +defaults are set such that \eqn{\beta = 1}, \eqn{\mu^2 = 30} and +\eqn{\pi_0 = 0.7}. } \references{ Belloni, A., Chen, D., Chernozhukov, V., and Hansen, C. (2012), From 0fdd835e5ed143712c35ba9181b2986d89665115 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Wed, 21 Jul 2021 10:54:06 +0200 Subject: [PATCH 05/16] fix bug in BCCH2012 --- R/datasets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/datasets.R b/R/datasets.R index cb596a47..c421b3e3 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -659,7 +659,7 @@ make_pliv_BCCH2012 = function(n_obs = 100, beta = 1, dim_z = 100, pi_0 = 0.7, z = rmvnorm(n = n_obs, mean = mu_z, sigma = sigma_z) pi = pi_0^(0:(dim_z-1)) - scale = sqrt(mu2/((n_obs + mu2)*pi %*% sigma_z %*% pi)) + scale = c(sqrt(mu2/((n_obs + mu2)*pi %*% sigma_z %*% pi))) sigma_v = sqrt(1 - (scale^2)*t(pi) %*% sigma_z %*% pi) sev = corr * sigma_e * sigma_v From 47e4d01240338e176456ac5c10c611c78ac591c7 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Wed, 21 Jul 2021 17:44:07 +0200 Subject: [PATCH 06/16] 1st Draft for PartialZ with low-dim Xs --- R/double_ml_pliv.R | 62 ++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/R/double_ml_pliv.R b/R/double_ml_pliv.R index 31d31183..9ba8df29 100644 --- a/R/double_ml_pliv.R +++ b/R/double_ml_pliv.R @@ -251,7 +251,6 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", return(res) }, - ml_nuisance_and_score_elements_partialX = function(smpls, ...) { g_hat = dml_cv_predict(self$learner$ml_g, @@ -439,25 +438,28 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", ml_nuisance_and_score_elements_partialZ = function(smpls, ...) { # nuisance r - - r_hat = dml_cv_predict(self$learner$ml_r, - c( - self$data$x_cols, - self$data$other_treat_cols, - self$data$z_cols), - self$data$treat_col, - self$data$data_model, - nuisance_id = "nuis_r", - smpls = smpls, - est_params = self$get_params("ml_r"), - return_train_preds = FALSE, - learner_class = private$learner_class$ml_r, - fold_specific_params = private$fold_specific_params) - d = self$data$data_model[[self$data$treat_col]] y = self$data$data_model[[self$data$y_col]] - if ( ! (is.null(self$data$x_cols) | length(self$data$x_cols) == 0)) { + if ( (is.null(self$data$x_cols) | length(self$data$x_cols) == 0)) { + r_hat = dml_cv_predict(self$learner$ml_r, + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + self$data$treat_col, + self$data$data_model, + nuisance_id = "nuis_r", + smpls = smpls, + est_params = self$get_params("ml_r"), + return_train_preds = FALSE, + learner_class = private$learner_class$ml_r, + fold_specific_params = + private$fold_specific_params) + w_hat = d + u_hat = y + } + else { # Partial out Xs from y and d based on linear regression task_part_y = initiate_task("lm_part_out_y", self$data$data_model, target = self$data$y_col, @@ -479,11 +481,24 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", r_part_d = resample(task_part_d, learner_lm, resampling_part_d, store_models = TRUE) w_hat = as.data.table(r_part_d$prediction())$response - } else { - w_hat = d - u_hat = y - } - + + data_aux = data.table(self$data$data_model, "w_hat" = w_hat) + + r_hat = dml_cv_predict(self$learner$ml_r, + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + "w_hat", + data_aux, + nuisance_id = "nuis_r", + smpls = smpls, + est_params = self$get_params("ml_r"), + return_train_preds = FALSE, + learner_class = private$learner_class$ml_r, + fold_specific_params = + private$fold_specific_params) + } if (is.character(self$score)) { if (self$score == "partialling out") { psi_a = -r_hat * w_hat @@ -674,7 +689,6 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", params = tuning_result_r$params)) return(tuning_result) }, - ml_nuisance_tuning_partialZ = function(smpls, param_set, tune_settings, tune_on_folds, ...) { if (!tune_on_folds) { @@ -684,7 +698,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", smpls$train_ids, function(x) extract_training_data(self$data$data_model, x)) } - + tuning_result_r = dml_tune(self$learner$ml_r, c( self$data$x_cols, From 267fc2cf157c11ba663cd37b273777863ac65902 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Thu, 22 Jul 2021 17:37:49 +0200 Subject: [PATCH 07/16] fix case PLIVpartialZ case with covariates --- R/double_ml_pliv.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/double_ml_pliv.R b/R/double_ml_pliv.R index 9ba8df29..40ee35d9 100644 --- a/R/double_ml_pliv.R +++ b/R/double_ml_pliv.R @@ -441,7 +441,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", d = self$data$data_model[[self$data$treat_col]] y = self$data$data_model[[self$data$y_col]] - if ( (is.null(self$data$x_cols) | length(self$data$x_cols) == 0)) { + if (test_character(self$data$x_cols, len = 0)) { r_hat = dml_cv_predict(self$learner$ml_r, c( self$data$x_cols, @@ -456,11 +456,8 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", learner_class = private$learner_class$ml_r, fold_specific_params = private$fold_specific_params) - w_hat = d - u_hat = y - } - else { - # Partial out Xs from y and d based on linear regression + } else { + # Partial out Xs from y and d by using linear regression task_part_y = initiate_task("lm_part_out_y", self$data$data_model, target = self$data$y_col, select_cols = c(self$data$x_cols, @@ -470,7 +467,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", resampling_part_y = rsmp("insample")$instantiate(task_part_y) r_part_y = resample(task_part_y, learner_lm, resampling_part_y, store_models = TRUE) - u_hat = as.data.table(r_part_y$prediction())$response + y_tilde = y - as.data.table(r_part_y$prediction())$response task_part_d = initiate_task("lm_part_out_d", self$data$data_model, target = self$data$treat_col, @@ -480,16 +477,15 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", resampling_part_d = rsmp("insample")$instantiate(task_part_d) r_part_d = resample(task_part_d, learner_lm, resampling_part_d, store_models = TRUE) - w_hat = as.data.table(r_part_d$prediction())$response - - data_aux = data.table(self$data$data_model, "w_hat" = w_hat) - + d_tilde = d - as.data.table(r_part_d$prediction())$response + + data_aux = data.table(self$data$data_model, "d_tilde" = d_tilde) r_hat = dml_cv_predict(self$learner$ml_r, c( self$data$x_cols, self$data$other_treat_cols, self$data$z_cols), - "w_hat", + "d_tilde", data_aux, nuisance_id = "nuis_r", smpls = smpls, @@ -501,8 +497,13 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", } if (is.character(self$score)) { if (self$score == "partialling out") { - psi_a = -r_hat * w_hat - psi_b = r_hat * u_hat + if (test_character(self$data$x_cols, len = 0)) { + psi_a = -r_hat * d + psi_b = r_hat * y + } else { + psi_a = -r_hat * d_tilde + psi_b = r_hat * y_tilde + } } res = list(psi_a = psi_a, psi_b = psi_b) } else if (is.function(self$score)) { From 23f59d0de204c44db838393878f81c0217cb35d5 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Thu, 22 Jul 2021 17:59:42 +0200 Subject: [PATCH 08/16] set up tests for PLIV case partialZ with covariates --- tests/testthat/helper-03-dgp.R | 10 ++- tests/testthat/helper-04-simdata.R | 19 +++++- .../test-double_ml_pliv_partial_z_with_x.R | 68 +++++++++++++++++++ 3 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/test-double_ml_pliv_partial_z_with_x.R diff --git a/tests/testthat/helper-03-dgp.R b/tests/testthat/helper-03-dgp.R index cedf4c78..4757dd92 100644 --- a/tests/testthat/helper-03-dgp.R +++ b/tests/testthat/helper-03-dgp.R @@ -133,7 +133,8 @@ dgp1_toeplitz = function(n, p, betamax = 4, decay = 0.99, threshold = 0, noiseva return(data) } -make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150) { +make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150, + return_x_vars = FALSE) { sigma_e_u = matrix(c(1, 0.6, 0.6, 1), ncol = 2) mu_e_u = rep(0, 2) e_u = mvtnorm::rmvnorm(n = n_obs, mean = mu_e_u, sigma = sigma_e_u) @@ -160,13 +161,16 @@ make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150) { d = x %*% gamma + z %*% delta + u y = alpha * d + x %*% beta + epsilon - colnames(x) = paste0("X", 1:dim_x) colnames(z) = paste0("Z", 1:dim_z) colnames(y) = "y" colnames(d) = "d" - data = data.frame(x, y, d, z) + if (return_x_vars) { + data = data.frame(x, y, d, z) + } else { + data = data.frame(y, d, z) + } return(data) } diff --git a/tests/testthat/helper-04-simdata.R b/tests/testthat/helper-04-simdata.R index 744a9ae3..727d63f3 100644 --- a/tests/testthat/helper-04-simdata.R +++ b/tests/testthat/helper-04-simdata.R @@ -97,10 +97,25 @@ dim_z = 150 df = make_data_pliv_partialZ( setting$n, alpha = setting$theta, - dim_x = 5) -Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE] + dim_x = 5, + return_x_vars = FALSE) +Xnames = NULL dml_data = double_ml_data_from_data_frame(df, y_col = "y", d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) data_pliv_partialZ = list(df = df, dml_data = dml_data) + +set.seed(1282) +dim_z = 150 +df = make_data_pliv_partialZ( + setting$n, + alpha = setting$theta, + dim_x = 5, + return_x_vars = TRUE) +Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE] +dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) +data_pliv_partialZ_with_X = list(df = df, + dml_data = dml_data) \ No newline at end of file diff --git a/tests/testthat/test-double_ml_pliv_partial_z_with_x.R b/tests/testthat/test-double_ml_pliv_partial_z_with_x.R new file mode 100644 index 00000000..ac5c5b20 --- /dev/null +++ b/tests/testthat/test-double_ml_pliv_partial_z_with_x.R @@ -0,0 +1,68 @@ +context("Unit tests for PLIV.partialZ with covariates X") + +lgr::get_logger("mlr3")$set_threshold("warn") + +skip_on_cran() + +test_cases = expand.grid( + learner = c("regr.lm", "regr.glmnet"), + dml_procedure = c("dml1", "dml2"), + score = "partialling out", + stringsAsFactors = FALSE) +test_cases["test_name"] = apply(test_cases, 1, paste, collapse = "_") + +patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", + .cases = test_cases, { + learner_pars = get_default_mlmethod_pliv(learner) + n_rep_boot = 498 + + set.seed(3141) + dim_z = 150 + pliv_hat = dml_pliv_partial_z(data_pliv_partialZ_with_X$df, + y = "y", d = "d", z = paste0("Z", 1:dim_z), + n_folds = 5, + ml_r = learner_pars$ml_r$clone(), + dml_procedure = dml_procedure, score = score) + theta = pliv_hat$coef + se = pliv_hat$se + + boot_theta = bootstrap_pliv_partial_z(pliv_hat$thetas, pliv_hat$ses, + data_pliv_partialZ$df, + y = "y", d = "d", z = paste0("Z", 1:dim_z), + n_folds = 5, smpls = pliv_hat$smpls, + all_preds= pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + + set.seed(3141) + double_mlpliv_obj = DoubleMLPLIV.partialZ(data_pliv_partialZ_with_X$dml_data, + ml_r = learner_pars$ml_r$clone(), + n_folds = 5, + score = score, + dml_procedure = dml_procedure) + + double_mlpliv_obj$fit() + theta_obj = double_mlpliv_obj$coef + se_obj = double_mlpliv_obj$se + + # bootstrap + double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + boot_theta_obj = double_mlpliv_obj$boot_coef + + # at the moment the object result comes without a name + expect_equal(theta, theta_obj, tolerance = 1e-8) + expect_equal(se, se_obj, tolerance = 1e-8) + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) + } +) + +test_that("Unit tests for PLIV.partialZ invalid score", { + msg = paste("Callable score not implemented for DoubleMLPLIV with", + "partialX=FALSE and partialZ=TRUE.") + double_mlplr_obj <- DoubleMLPLIV.partialZ( + data_pliv_partialZ$dml_data, + ml_r = mlr3::lrn('regr.rpart'), + score = function(x) return(mean(x))) + expect_error(double_mlplr_obj$fit(), + regexp = msg) +} +) From 39753e3ef952103e5a32cbf3ce93bac641853a8e Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Fri, 23 Jul 2021 11:35:36 +0200 Subject: [PATCH 09/16] fct implementation PLIV, partial Z case with Xs --- tests/testthat/helper-14-dml_pliv_partial_z.R | 78 ++++++++++++++----- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/tests/testthat/helper-14-dml_pliv_partial_z.R b/tests/testthat/helper-14-dml_pliv_partial_z.R index 7e292960..ae7f3cc1 100644 --- a/tests/testthat/helper-14-dml_pliv_partial_z.R +++ b/tests/testthat/helper-14-dml_pliv_partial_z.R @@ -11,6 +11,17 @@ dml_pliv_partial_z = function(data, y, d, z, all_thetas = all_ses = rep(NA, n_rep) all_preds = list() + # check whether data contains Xs + x_indx = names(data)[! (names(data) %in% c(y,d,z))] + if (length(x_indx) != 0) { + formula_rhs = paste0(x_indx, collapse = " + ") + lm_y = lm(paste0("y ~ ", formula_rhs), data) + data$y_tilde = data[, y] - predict(lm_y) + + lm_d = lm(paste0("d ~ ", formula_rhs), data) + data$d_tilde = data[, d] - predict(lm_d) + } + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] @@ -23,8 +34,14 @@ dml_pliv_partial_z = function(data, y, d, z, this_smpl, all_preds[[i_rep]]) r_hat = residuals$r_hat - D = data[, d] - Y = data[, y] + + if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { + D = data[, d] + Y = data[, y] + } else { + D = data[, "d_tilde"] + Y = data[, "y_tilde"] + } # DML 1 if (dml_procedure == "dml1") { @@ -72,7 +89,8 @@ dml_pliv_partial_z = function(data, y, d, z, res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds=all_preds, smpls=smpls) + all_preds=all_preds, smpls=smpls, + data_with_res = data) return(res) } @@ -83,24 +101,40 @@ fit_nuisance_pliv_partial_z = function(data, y, d, z, params_r) { train_ids = smpls$train_ids test_ids = smpls$test_ids - - # nuisance r: E[D|X] - r_indx = names(data) != y - data_r = data[, r_indx, drop = FALSE] - task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", d), backend = data_r, target = d) - if (!is.null(params_r)) { - ml_r$param_set$values = params_r - } - resampling_r = mlr3::rsmp("custom") - resampling_r$instantiate(task_r, train_ids, test_ids) - - r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE) - r_hat_list = lapply(r_r$predictions(), function(x) x$response) + if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { + # case without Xs + + # nuisance r: E[D|X] + r_indx = names(data) != y + data_r = data[, r_indx, drop = FALSE] + task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", d), backend = data_r, target = d) + if (!is.null(params_r)) { + ml_r$param_set$values = params_r + } + + resampling_r = mlr3::rsmp("custom") + resampling_r$instantiate(task_r, train_ids, test_ids) + + r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE) + r_hat_list = lapply(r_r$predictions(), function(x) x$response) + } else { + r_indx = ! (names(data) %in% c(y, d, "y_tilde")) + data_r = data[, r_indx, drop = FALSE] + task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", d), backend = data_r, target = "d_tilde") + if (!is.null(params_r)) { + ml_r$param_set$values = params_r + } + + resampling_r = mlr3::rsmp("custom") + resampling_r$instantiate(task_r, train_ids, test_ids) + + r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE) + r_hat_list = lapply(r_r$predictions(), function(x) x$response) + } all_preds = list( r_hat_list = r_hat_list) - return(all_preds) } @@ -147,9 +181,15 @@ bootstrap_pliv_partial_z = function(theta, se, data, y, d, z, n_folds, smpls, smpls[[i_rep]], all_preds[[i_rep]]) r_hat = residuals$r_hat - D = data[, d] - Y = data[, y] + if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { + D = data[, d] + Y = data[, y] + } else { + D = data[, "d_tilde"] + Y = data[, "y_tilde"] + } + psi = (Y - D * theta[i_rep]) * r_hat psi_a = - r_hat * D From b70911f996720f148d4102991168c333d87b5dcd Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Fri, 23 Jul 2021 11:35:55 +0200 Subject: [PATCH 10/16] tests for PartialZ with Xs --- tests/testthat/test-double_ml_pliv_partial_z_with_x.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-double_ml_pliv_partial_z_with_x.R b/tests/testthat/test-double_ml_pliv_partial_z_with_x.R index ac5c5b20..1e157b95 100644 --- a/tests/testthat/test-double_ml_pliv_partial_z_with_x.R +++ b/tests/testthat/test-double_ml_pliv_partial_z_with_x.R @@ -25,9 +25,13 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - + + # data with residuals + data_with_res = pliv_hat$data_with_res + + set.seed(3141) boot_theta = bootstrap_pliv_partial_z(pliv_hat$thetas, pliv_hat$ses, - data_pliv_partialZ$df, + data_with_res, y = "y", d = "d", z = paste0("Z", 1:dim_z), n_folds = 5, smpls = pliv_hat$smpls, all_preds= pliv_hat$all_preds, @@ -45,6 +49,7 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", se_obj = double_mlpliv_obj$se # bootstrap + set.seed(3141) double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) boot_theta_obj = double_mlpliv_obj$boot_coef From 98cbef158c86aaafa1f50f86ad71ef463d57b08b Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Fri, 23 Jul 2021 12:46:20 +0200 Subject: [PATCH 11/16] adjust test for param passing (PLIV with partialZ, case with / without Xs) --- ...uble_ml_pliv_partial_z_parameter_passing.R | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R b/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R index 87d87799..e2d155a7 100644 --- a/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R +++ b/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R @@ -8,12 +8,14 @@ test_cases = expand.grid( learner = "regr.rpart", dml_procedure = c("dml1", "dml2"), score = "partialling out", + with_x = c(TRUE, FALSE), stringsAsFactors = FALSE) test_cases_nocf = expand.grid( learner = "regr.rpart", dml_procedure = "dml1", score = "partialling out", + with_x = c(TRUE, FALSE), stringsAsFactors = FALSE) test_cases["test_name"] = apply(test_cases, 1, paste, collapse = "_") @@ -28,6 +30,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df + if (! with_x) { + x_indx = grep("X", names(df)) + df = df[, - x_indx, drop = FALSE] + } + set.seed(3141) pliv_hat = dml_pliv_partial_z(df, y = "y", d = "d", z = c("z", "z2"), @@ -38,8 +45,15 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par theta = pliv_hat$coef se = pliv_hat$se + if (! with_x) { + df_boot = df + } else { + df_boot = pliv_hat$data_with_res + } + + set.seed(3141) boot_theta = bootstrap_pliv_partial_z(pliv_hat$thetas, pliv_hat$ses, - df, + df_boot, y = "y", d = "d", z = c("z", "z2"), n_folds = n_folds, n_rep = n_rep, smpls = pliv_hat$smpls, @@ -69,6 +83,7 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par se_obj = dml_pliv_obj$se # bootstrap + set.seed(3141) dml_pliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) boot_theta_obj = dml_pliv_obj$boot_coef @@ -86,6 +101,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df + if (! with_x) { + x_indx = grep("X", names(df)) + df = df[, - x_indx, drop = FALSE] + } + # Passing for non-cross-fitting case set.seed(3141) my_task = Task$new("help task", "regr", data_pliv$df) @@ -183,7 +203,7 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par } ) -patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.partialXZ (default vs explicit)", +patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.partialZ (default vs explicit)", .cases = test_cases, { n_folds = 2 n_rep = 3 From 9f02ca9a6751bb6ae32aefdc6dcc37044294a146 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Fri, 23 Jul 2021 13:06:39 +0200 Subject: [PATCH 12/16] tuning for PartialZ case with/without Xs and adjust tests --- R/double_ml_pliv.R | 67 ++++++++++++++++----- tests/testthat/test-double_ml_pliv_tuning.R | 45 +++++++++++--- 2 files changed, 87 insertions(+), 25 deletions(-) diff --git a/R/double_ml_pliv.R b/R/double_ml_pliv.R index 40ee35d9..e0c46f45 100644 --- a/R/double_ml_pliv.R +++ b/R/double_ml_pliv.R @@ -692,24 +692,59 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", }, ml_nuisance_tuning_partialZ = function(smpls, param_set, tune_settings, tune_on_folds, ...) { - if (!tune_on_folds) { - data_tune_list = list(self$data$data_model) + + if (test_character(self$data$x_cols, len = 0)) { + if (!tune_on_folds) { + data_tune_list = list(self$data$data_model) + } else { + data_tune_list = lapply( + smpls$train_ids, + function(x) extract_training_data(self$data$data_model, x)) + } + tuning_result_r = dml_tune(self$learner$ml_r, + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + self$data$treat_col, data_tune_list, + nuisance_id = "nuis_r", + param_set$ml_r, tune_settings, + tune_settings$measure$ml_r, + private$learner_class$ml_r) } else { - data_tune_list = lapply( - smpls$train_ids, - function(x) extract_training_data(self$data$data_model, x)) + # Partial out Xs from d by using linear regression + task_part_d = initiate_task("lm_part_out_d", self$data$data_model, + target = self$data$treat_col, + select_cols = c(self$data$x_cols, + self$data$other_treat_cols), + "LearnerRegr") + resampling_part_d = rsmp("insample")$instantiate(task_part_d) + learner_lm = LearnerRegrLM$new() + r_part_d = resample(task_part_d, learner_lm, resampling_part_d, + store_models = TRUE) + d_tilde = self$data$data_model[[self$data$treat_col]] - + as.data.table(r_part_d$prediction())$response + data_aux = data.table(self$data$data_model, "d_tilde" = d_tilde) + + if (!tune_on_folds) { + data_tune_list = list(data_aux) + } else { + data_tune_list = lapply( + smpls$train_ids, + function(x) extract_training_data(data_aux, x)) + } + + tuning_result_r = dml_tune(self$learner$ml_r, + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + "d_tilde", data_tune_list, + nuisance_id = "nuis_r", + param_set$ml_r, tune_settings, + tune_settings$measure$ml_r, + private$learner_class$ml_r) } - - tuning_result_r = dml_tune(self$learner$ml_r, - c( - self$data$x_cols, - self$data$other_treat_cols, - self$data$z_cols), - self$data$treat_col, data_tune_list, - nuisance_id = "nuis_r", - param_set$ml_r, tune_settings, - tune_settings$measure$ml_r, - private$learner_class$ml_r) tuning_result = list("ml_r" = list(tuning_result_r, params = tuning_result_r$params)) diff --git a/tests/testthat/test-double_ml_pliv_tuning.R b/tests/testthat/test-double_ml_pliv_tuning.R index 8de9664a..83655158 100644 --- a/tests/testthat/test-double_ml_pliv_tuning.R +++ b/tests/testthat/test-double_ml_pliv_tuning.R @@ -52,7 +52,6 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", .cases = test_cases, { # TBD: Functional Test Case - set.seed(3141) n_folds = 2 n_rep_boot = 498 @@ -70,6 +69,7 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", z_cols = z_vars[[z_indx]] set.seed(3141) df = data_pliv$df + Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] data_ml = double_ml_data_from_data_frame(df, y_col = "y", @@ -127,14 +127,41 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", # } # if (data_ml$n_instr > 1) { + + # Case without X's + set.seed(3141) + data_ml_noX = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = "d", x_cols = character(0), z_cols = z_cols) + double_mlpliv_obj_tuned_Z_noX = DoubleMLPLIV.partialZ(data_ml_noX, + n_folds = n_folds, + ml_r = learner, + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) + param_grid_r = list("ml_r" = param_grid[["ml_r"]]) + tune_settings_r = tune_settings + tune_settings_r$measure$ml_g = tune_settings_r$measure$ml_m = NULL + double_mlpliv_obj_tuned_Z_noX$tune( + param_set = param_grid_r, tune_on_folds = tune_on_folds, + tune_settings = tune_settings_r) + double_mlpliv_obj_tuned_Z_noX$fit() + + theta_obj_tuned_Z_noX = double_mlpliv_obj_tuned_Z_noX$coef + se_obj_tuned_Z_noX = double_mlpliv_obj_tuned_Z_noX$se + + expect_is(theta_obj_tuned_Z_noX, "numeric") + expect_is(se_obj_tuned_Z_noX, "numeric") + + # Case with X's set.seed(3141) double_mlpliv_obj_tuned_Z = DoubleMLPLIV.partialZ(data_ml, - n_folds = n_folds, - ml_r = learner, - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep) - + n_folds = n_folds, + ml_r = learner, + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) + param_grid_r = list("ml_r" = param_grid[["ml_r"]]) tune_settings_r = tune_settings tune_settings_r$measure$ml_g = tune_settings_r$measure$ml_m = NULL @@ -142,10 +169,10 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", param_set = param_grid_r, tune_on_folds = tune_on_folds, tune_settings = tune_settings_r) double_mlpliv_obj_tuned_Z$fit() - + theta_obj_tuned_Z = double_mlpliv_obj_tuned_Z$coef se_obj_tuned_Z = double_mlpliv_obj_tuned_Z$se - + expect_is(theta_obj_tuned_Z, "numeric") expect_is(se_obj_tuned_Z, "numeric") From d680675c2240b18512552f9031f24fb975194d1e Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Fri, 23 Jul 2021 13:17:28 +0200 Subject: [PATCH 13/16] add tests for BCCH2012 DGP --- tests/testthat/test-double_ml_datasets.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test-double_ml_datasets.R b/tests/testthat/test-double_ml_datasets.R index 70f27248..38a88e86 100644 --- a/tests/testthat/test-double_ml_datasets.R +++ b/tests/testthat/test-double_ml_datasets.R @@ -39,6 +39,18 @@ patrick::with_parameters_test_that("Unit tests for datasets functionalities:", expect_is(df$d, "matrix") expect_is(df$z, "matrix") } + + # Test BCCH2012 + if (return_type != "matrix") { + df = make_pliv_BCCH2012(return_type = return_type) + expect_is(df, paste0(return_type)) + } else { + df = make_pliv_BCCH2012(return_type = return_type) + expect_is(df, "list") + expect_is(df$y, "matrix") + expect_is(df$d, "matrix") + expect_is(df$z, "matrix") + } # Test IRM if (return_type != "matrix") { From 1bce63966adcc7d240acafd268621cc4249b56c1 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Mon, 9 Aug 2021 18:12:05 +0200 Subject: [PATCH 14/16] merge master into p-extensions-iv --- R/double_ml.R | 336 ++++--- R/double_ml_data.R | 97 +- R/double_ml_iivm.R | 29 +- R/double_ml_irm.R | 18 +- R/double_ml_pliv.R | 35 +- R/helper.R | 41 +- tests/testthat/helper-01-helper_functions.R | 10 +- tests/testthat/helper-03-dgp.R | 26 +- tests/testthat/helper-04-simdata.R | 65 +- tests/testthat/helper-05-ml-learner.R | 20 +- tests/testthat/helper-08-dml_plr.R | 248 ++--- tests/testthat/helper-09-dml_pliv.R | 111 ++- tests/testthat/helper-10-dml_irm.R | 124 +-- tests/testthat/helper-11-dml_iivm.R | 131 +-- tests/testthat/helper-12-p_adjust.R | 4 +- tests/testthat/helper-13-dml_pliv_partial_x.R | 128 +-- tests/testthat/helper-14-dml_pliv_partial_z.R | 83 +- .../testthat/helper-15-dml_pliv_partial_xz.R | 137 +-- .../testthat/test-double_ml_active_bindings.R | 96 +- tests/testthat/test-double_ml_data.R | 931 +++++++++--------- .../test-double_ml_data_active_bindings.R | 444 +++++---- tests/testthat/test-double_ml_iivm.R | 16 +- .../test-double_ml_iivm_parameter_passing.R | 217 ++-- tests/testthat/test-double_ml_iivm_trim.R | 34 +- tests/testthat/test-double_ml_irm.R | 16 +- .../test-double_ml_irm_loaded_mlr3learner.R | 6 +- .../test-double_ml_irm_parameter_passing.R | 176 ++-- tests/testthat/test-double_ml_irm_trim.R | 30 +- .../testthat/test-double_ml_irm_user_score.R | 4 +- tests/testthat/test-double_ml_pliv.R | 14 +- ...double_ml_pliv_multi_z_parameter_passing.R | 270 ++--- .../test-double_ml_pliv_parameter_passing.R | 291 +++--- ...e_ml_pliv_partial_functional_initializer.R | 44 +- .../testthat/test-double_ml_pliv_partial_x.R | 45 +- .../testthat/test-double_ml_pliv_partial_xz.R | 44 +- ...ble_ml_pliv_partial_xz_parameter_passing.R | 299 +++--- .../testthat/test-double_ml_pliv_partial_z.R | 35 +- ...uble_ml_pliv_partial_z_parameter_passing.R | 163 +-- .../testthat/test-double_ml_pliv_user_score.R | 6 +- tests/testthat/test-double_ml_plr.R | 24 +- .../testthat/test-double_ml_plr_classifier.R | 83 +- .../test-double_ml_plr_exception_handling.R | 8 +- .../test-double_ml_plr_loaded_mlr3learner.R | 8 +- .../testthat/test-double_ml_plr_multitreat.R | 30 +- .../testthat/test-double_ml_plr_nocrossfit.R | 25 +- .../test-double_ml_plr_parameter_passing.R | 288 +++--- .../test-double_ml_plr_rep_cross_fit.R | 36 +- .../testthat/test-double_ml_plr_set_samples.R | 4 +- .../test-double_ml_set_sample_splitting.R | 342 ++++--- 49 files changed, 3024 insertions(+), 2648 deletions(-) diff --git a/R/double_ml.R b/R/double_ml.R index 47a3a773..7580a42d 100644 --- a/R/double_ml.R +++ b/R/double_ml.R @@ -8,188 +8,260 @@ #' #' @family DoubleML DoubleML = R6Class("DoubleML", - active = list( + active = list( #' @field all_coef (`matrix()`) \cr #' Estimates of the causal parameter(s) for the `n_rep` different sample #' splits after calling `fit()`. all_coef = function(value) { - if (missing(value)) return(private$all_coef_) - else stop("can't set field all_coef") + if (missing(value)) { + return(private$all_coef_) + } else { + stop("can't set field all_coef") + } }, #' @field all_dml1_coef (`array()`) \cr #' Estimates of the causal parameter(s) for the `n_rep` different sample #' splits after calling `fit()` with `dml_procedure = "dml1"`. all_dml1_coef = function(value) { - if (missing(value)) return(private$all_dml1_coef_) - else stop("can't set field all_dml1_coef") + if (missing(value)) { + return(private$all_dml1_coef_) + } else { + stop("can't set field all_dml1_coef") + } }, #' @field all_se (`matrix()`) \cr #' Standard errors of the causal parameter(s) for the `n_rep` different #' sample splits after calling `fit()`. all_se = function(value) { - if (missing(value)) return(private$all_se_) - else stop("can't set field all_se") + if (missing(value)) { + return(private$all_se_) + } else { + stop("can't set field all_se") + } }, #' @field apply_cross_fitting (`logical(1)`) \cr #' Indicates whether cross-fitting should be applied. Default is `TRUE`. apply_cross_fitting = function(value) { - if (missing(value)) return(private$apply_cross_fitting_) - else stop("can't set field apply_cross_fitting") + if (missing(value)) { + return(private$apply_cross_fitting_) + } else { + stop("can't set field apply_cross_fitting") + } }, #' @field boot_coef (`matrix()`) \cr #' Bootstrapped coefficients for the causal parameter(s) after calling #' `fit()` and `bootstrap()`. boot_coef = function(value) { - if (missing(value)) return(private$boot_coef_) - else stop("can't set field boot_coef") + if (missing(value)) { + return(private$boot_coef_) + } else { + stop("can't set field boot_coef") + } }, #' @field boot_t_stat (`matrix()`) \cr #' Bootstrapped t-statistics for the causal parameter(s) after calling #' `fit()` and `bootstrap()`. boot_t_stat = function(value) { - if (missing(value)) return(private$boot_t_stat_) - else stop("can't set field boot_t_stat") + if (missing(value)) { + return(private$boot_t_stat_) + } else { + stop("can't set field boot_t_stat") + } }, #' @field coef (`numeric()`) \cr #' Estimates for the causal parameter(s) after calling `fit()`. coef = function(value) { - if (missing(value)) return(private$coef_) - else stop("can't set field coef") + if (missing(value)) { + return(private$coef_) + } else { + stop("can't set field coef") + } }, #' @field data ([`data.table`][data.table::data.table()])\cr #' Data object. data = function(value) { - if (missing(value)) return(private$data_) - else stop("can't set field data") + if (missing(value)) { + return(private$data_) + } else { + stop("can't set field data") + } }, #' @field dml_procedure (`character(1)`) \cr #' A `character()` (`"dml1"` or `"dml2"`) specifying the double machine #' learning algorithm. Default is `"dml2"`. dml_procedure = function(value) { - if (missing(value)) return(private$dml_procedure_) - else stop("can't set field dml_procedure") + if (missing(value)) { + return(private$dml_procedure_) + } else { + stop("can't set field dml_procedure") + } }, #' @field draw_sample_splitting (`logical(1)`) \cr #' Indicates whether the sample splitting should be drawn during #' initialization of the object. Default is `TRUE`. draw_sample_splitting = function(value) { - if (missing(value)) return(private$draw_sample_splitting_) - else stop("can't set field draw_sample_splitting") + if (missing(value)) { + return(private$draw_sample_splitting_) + } else { + stop("can't set field draw_sample_splitting") + } }, #' @field learner (named `list()`) \cr #' The machine learners for the nuisance functions. learner = function(value) { - if (missing(value)) return(private$learner_) - else stop("can't set field learner") + if (missing(value)) { + return(private$learner_) + } else { + stop("can't set field learner") + } }, #' @field n_folds (`integer(1)`) \cr #' Number of folds. Default is `5`. n_folds = function(value) { - if (missing(value)) return(private$n_folds_) - else stop("can't set field n_folds") + if (missing(value)) { + return(private$n_folds_) + } else { + stop("can't set field n_folds") + } }, #' @field n_rep (`integer(1)`) \cr #' Number of repetitions for the sample splitting. Default is `1`. n_rep = function(value) { - if (missing(value)) return(private$n_rep_) - else stop("can't set field n_rep") + if (missing(value)) { + return(private$n_rep_) + } else { + stop("can't set field n_rep") + } }, #' @field params (named `list()`) \cr #' The hyperparameters of the learners. params = function(value) { - if (missing(value)) return(private$params_) - else stop("can't set field params") + if (missing(value)) { + return(private$params_) + } else { + stop("can't set field params") + } }, #' @field psi (`array()`) \cr #' Value of the score function #' \eqn{\psi(W;\theta, \eta)=\psi_a(W;\eta) \theta + \psi_b (W; \eta)} #' after calling `fit()`. - psi = function(value) { - if (missing(value)) return(private$psi_) - else stop("can't set field psi") + psi = function(value) { + if (missing(value)) { + return(private$psi_) + } else { + stop("can't set field psi") + } }, #' @field psi_a (`array()`) \cr #' Value of the score function component \eqn{\psi_a(W;\eta)} after #' calling `fit()`. - psi_a = function(value) { - if (missing(value)) return(private$psi_a_) - else stop("can't set field psi_a") + psi_a = function(value) { + if (missing(value)) { + return(private$psi_a_) + } else { + stop("can't set field psi_a") + } }, #' @field psi_b (`array()`) \cr #' Value of the score function component \eqn{\psi_b(W;\eta)} after #' calling `fit()`. - psi_b = function(value) { - if (missing(value)) return(private$psi_b_) - else stop("can't set field psi_b") + psi_b = function(value) { + if (missing(value)) { + return(private$psi_b_) + } else { + stop("can't set field psi_b") + } }, #' @field predictions (`array()`) \cr #' Predictions of the nuisance models after calling #' `fit(store_predictions=TRUE)`. - predictions = function(value) { - if (missing(value)) return(private$predictions_) - else stop("can't set field predictions") + predictions = function(value) { + if (missing(value)) { + return(private$predictions_) + } else { + stop("can't set field predictions") + } }, #' @field pval (`numeric()`) \cr #' p-values for the causal parameter(s) after calling `fit()`. - pval = function(value) { - if (missing(value)) return(private$pval_) - else stop("can't set field pval") + pval = function(value) { + if (missing(value)) { + return(private$pval_) + } else { + stop("can't set field pval") + } }, #' @field score (`character(1)`, `function()`) \cr #' A `character(1)` or `function()` specifying the score function. - score = function(value) { - if (missing(value)) return(private$score_) - else stop("can't set field score") + score = function(value) { + if (missing(value)) { + return(private$score_) + } else { + stop("can't set field score") + } }, #' @field se (`numeric()`) \cr #' Standard errors for the causal parameter(s) after calling `fit()`. - se = function(value) { - if (missing(value)) return(private$se_) - else stop("can't set field se") + se = function(value) { + if (missing(value)) { + return(private$se_) + } else { + stop("can't set field se") + } }, #' @field smpls (`list()`) \cr #' The partition used for cross-fitting. - smpls = function(value) { - if (missing(value)) return(private$smpls_) - else stop("can't set field smpls") + smpls = function(value) { + if (missing(value)) { + return(private$smpls_) + } else { + stop("can't set field smpls") + } }, #' @field t_stat (`numeric()`) \cr #' t-statistics for the causal parameter(s) after calling `fit()`. - t_stat = function(value) { - if (missing(value)) return(private$t_stat_) - else stop("can't set field t_stat") + t_stat = function(value) { + if (missing(value)) { + return(private$t_stat_) + } else { + stop("can't set field t_stat") + } }, #' @field tuning_res (named `list()`) \cr #' Results from hyperparameter tuning. - tuning_res = function(value) { - if (missing(value)) return(private$tuning_res_) - else stop("can't set field tuning_res") + tuning_res = function(value) { + if (missing(value)) { + return(private$tuning_res_) + } else { + stop("can't set field tuning_res") + } }), - public = list( + public = list( #' @description #' DoubleML is an abstract class that can't be initialized. initialize = function() { @@ -237,18 +309,18 @@ DoubleML = R6Class("DoubleML", "No. repeated sample splits: ", self$n_rep, "\n", "Apply cross-fitting: ", self$apply_cross_fitting, "\n") cat(header, "\n", - "\n------------------ Data summary ------------------\n", - data_info, - "\n------------------ Score & algorithm ------------------\n", - score_info, - "\n------------------ Machine learner ------------------\n", - learner_info, - "\n------------------ Resampling ------------------\n", - resampling_info, - "\n------------------ Fit summary ------------------\n ", - sep = "") + "\n------------------ Data summary ------------------\n", + data_info, + "\n------------------ Score & algorithm ------------------\n", + score_info, + "\n------------------ Machine learner ------------------\n", + learner_info, + "\n------------------ Resampling ------------------\n", + resampling_info, + "\n------------------ Fit summary ------------------\n ", + sep = "") self$summary() - + invisible(self) }, @@ -423,7 +495,7 @@ DoubleML = R6Class("DoubleML", #' (length of `train_ids` and `test_ids` is set as `n_folds`). #' #' @return self - #' + #' #' @examples #' library(DoubleML) #' library(mlr3) @@ -431,17 +503,17 @@ DoubleML = R6Class("DoubleML", #' obj_dml_data = make_plr_CCDDHNR2018(n_obs=10) #' dml_plr_obj = DoubleMLPLR$new(obj_dml_data, #' lrn("regr.rpart"), lrn("regr.rpart")) - #' + #' #' # simple sample splitting with two folds and without cross-fitting #' smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5)), #' test_ids = list(c(6, 7, 8, 9, 10)))) #' dml_plr_obj$set_sample_splitting(smpls) - #' + #' #' # sample splitting with two folds and cross-fitting but no repeated cross-fitting #' smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), #' test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5)))) #' dml_plr_obj$set_sample_splitting(smpls) - #' + #' #' # sample splitting with two folds and repeated cross-fitting with n_rep = 2 #' smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), #' test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), @@ -449,30 +521,29 @@ DoubleML = R6Class("DoubleML", #' test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) #' dml_plr_obj$set_sample_splitting(smpls) set_sample_splitting = function(smpls) { - if (test_list(smpls, names = "unnamed")) { lapply(smpls, function(x) check_smpl_split(x, self$data$n_obs)) - + n_folds_each_train_smpl = vapply( smpls, function(x) length(x$train_ids), integer(1L)) n_folds_each_test_smpl = vapply( smpls, function(x) length(x$test_ids), integer(1L)) - + if (!all(n_folds_each_train_smpl == n_folds_each_train_smpl[1])) { stop("Different number of folds for repeated cross-fitting.") } - + smpls_are_partitions = vapply( smpls, function(x) check_is_partition(x$test_ids, self$data$n_obs), - FUN.VALUE=TRUE) - + FUN.VALUE = TRUE) + if (all(smpls_are_partitions)) { if (length(smpls) == 1 & - n_folds_each_train_smpl[1] == 1 & - check_is_partition(smpls[[1]]$train_ids, self$data$n_obs)) { + n_folds_each_train_smpl[1] == 1 & + check_is_partition(smpls[[1]]$train_ids, self$data$n_obs)) { private$n_rep_ = 1 private$n_folds_ = 1 private$apply_cross_fitting_ = FALSE @@ -481,27 +552,35 @@ DoubleML = R6Class("DoubleML", private$n_rep_ = length(smpls) private$n_folds_ = n_folds_each_train_smpl[1] private$apply_cross_fitting_ = TRUE - lapply(smpls, - function(x) check_smpl_split(x, self$data$n_obs, - check_intersect = TRUE)) + lapply( + smpls, + function(x) { + check_smpl_split(x, self$data$n_obs, + check_intersect = TRUE) + }) private$smpls_ = smpls } } else { if (n_folds_each_train_smpl[1] != 1) { - stop(paste("Invalid partition provided.", - "Tuples (train_ids, test_ids) for more than one fold", - "provided that don't form a partition.")) + stop(paste( + "Invalid partition provided.", + "Tuples (train_ids, test_ids) for more than one fold", + "provided that don't form a partition.")) } if (length(smpls) != 1) { - stop(paste("Repeated sample splitting without cross-fitting not", - "implemented.")) + stop(paste( + "Repeated sample splitting without cross-fitting not", + "implemented.")) } private$n_rep_ = length(smpls) private$n_folds_ = 2 private$apply_cross_fitting_ = FALSE - lapply(smpls, - function(x) check_smpl_split(x, self$data$n_obs, - check_intersect = TRUE)) + lapply( + smpls, + function(x) { + check_smpl_split(x, self$data$n_obs, + check_intersect = TRUE) + }) private$smpls_ = smpls } } else { @@ -517,19 +596,20 @@ DoubleML = R6Class("DoubleML", private$n_folds_ = n_folds private$apply_cross_fitting_ = TRUE check_smpl_split(smpls, self$data$n_obs, - check_intersect = TRUE) + check_intersect = TRUE) private$smpls_ = list(smpls) } } else { if (n_folds != 1) { - stop(paste("Invalid partition provided.", - "Tuples (train_ids, test_ids) for more than one fold", - "provided that don't form a partition.")) + stop(paste( + "Invalid partition provided.", + "Tuples (train_ids, test_ids) for more than one fold", + "provided that don't form a partition.")) } private$n_folds_ = 2 private$apply_cross_fitting_ = FALSE check_smpl_split(smpls, self$data$n_obs, - check_intersect = TRUE) + check_intersect = TRUE) private$smpls_ = list(smpls) } } @@ -690,7 +770,7 @@ DoubleML = R6Class("DoubleML", message("fit() not yet called.") } else { k = length(self$coef) - table = matrix(NA, ncol = 4, nrow = k) + table = matrix(NA_real_, ncol = 4, nrow = k) rownames(table) = names(self$coef) colnames(table) = c("Estimate.", "Std. Error", "t value", "Pr(>|t|)") table[, 1] = self$coef @@ -700,9 +780,9 @@ DoubleML = R6Class("DoubleML", private$summary_table = table if (length(k)) { - print(paste( + cat( "Estimates and significance testing of the", - "effect of target variables")) + "effect of target variables\n") res = as.matrix(printCoefmat(private$summary_table, digits = digits, P.values = TRUE, @@ -755,9 +835,9 @@ DoubleML = R6Class("DoubleML", a = c(a, 1 - a) pct = format.perc(a, 3) fac = qnorm(a) - ci = array(NA_real_, dim = c(length(parm), 2L), dimnames = list( - parm, - pct)) + ci = array(NA_real_, + dim = c(length(parm), 2L), + dimnames = list(parm, pct)) ci[] = self$coef[parm] + self$se[parm] %o% fac } @@ -766,7 +846,9 @@ DoubleML = R6Class("DoubleML", a = (1 - level) ab = c(a / 2, 1 - a / 2) pct = format.perc(ab, 3) - ci = array(NA, dim = c(length(parm), 2L), dimnames = list(parm, pct)) + ci = array(NA_real_, + dim = c(length(parm), 2L), + dimnames = list(parm, pct)) if (all(is.na(self$boot_coef))) { stop(paste( @@ -970,8 +1052,8 @@ DoubleML = R6Class("DoubleML", t_stat_ = NULL, tuning_res_ = NULL, n_rep_boot = NULL, - i_rep = NA, - i_treat = NA, + i_rep = NA_integer_, + i_treat = NA_integer_, fold_specific_params = NULL, summary_table = NULL, learner_class = list(), @@ -1168,29 +1250,31 @@ DoubleML = R6Class("DoubleML", }, initialize_arrays = function() { - private$psi_ = array(NA, dim = c( + private$psi_ = array(NA_real_, dim = c( self$data$n_obs, self$n_rep, self$data$n_treat)) - private$psi_a_ = array(NA, dim = c( + private$psi_a_ = array(NA_real_, dim = c( self$data$n_obs, self$n_rep, self$data$n_treat)) - private$psi_b_ = array(NA, dim = c( + private$psi_b_ = array(NA_real_, dim = c( self$data$n_obs, self$n_rep, self$data$n_treat)) - private$coef_ = array(NA, dim = c(self$data$n_treat)) - private$se_ = array(NA, dim = c(self$data$n_treat)) + private$coef_ = array(NA_real_, dim = c(self$data$n_treat)) + private$se_ = array(NA_real_, dim = c(self$data$n_treat)) - private$all_coef_ = array(NA, dim = c(self$data$n_treat, self$n_rep)) - private$all_se_ = array(NA, dim = c(self$data$n_treat, self$n_rep)) + private$all_coef_ = array(NA_real_, + dim = c(self$data$n_treat, self$n_rep)) + private$all_se_ = array(NA_real_, + dim = c(self$data$n_treat, self$n_rep)) if (self$dml_procedure == "dml1") { if (self$apply_cross_fitting) { - private$all_dml1_coef_ = array(NA, dim = c( + private$all_dml1_coef_ = array(NA_real_, dim = c( self$data$n_treat, self$n_rep, self$n_folds)) } else { - private$all_dml1_coef_ = array(NA, dim = c( + private$all_dml1_coef_ = array(NA_real_, dim = c( self$data$n_treat, self$n_rep, 1)) } @@ -1198,17 +1282,17 @@ DoubleML = R6Class("DoubleML", }, initialize_boot_arrays = function(n_rep_boot) { private$n_rep_boot = n_rep_boot - private$boot_coef_ = array(NA, dim = c( + private$boot_coef_ = array(NA_real_, dim = c( self$data$n_treat, n_rep_boot * self$n_rep)) - private$boot_t_stat_ = array(NA, dim = c( + private$boot_t_stat_ = array(NA_real_, dim = c( self$data$n_treat, n_rep_boot * self$n_rep)) }, initialize_predictions = function() { private$predictions_ = sapply(self$params_names(), function(key) { - array(NA, dim = c( + array(NA_real_, dim = c( self$data$n_obs, self$n_rep, self$data$n_treat)) }, @@ -1243,7 +1327,7 @@ DoubleML = R6Class("DoubleML", if (dml_procedure == "dml1") { # Note that length(test_ids) is only not equal to self.n_folds # if self$apply_cross_fitting ==False - thetas = rep(NA, length(test_ids)) + thetas = rep(NA_real_, length(test_ids)) for (i_fold in seq_len(length(test_ids))) { test_index = test_ids[[i_fold]] thetas[i_fold] = private$orth_est(inds = test_index) @@ -1286,7 +1370,7 @@ DoubleML = R6Class("DoubleML", } private$se_ = sqrt(apply( n_obs * self$all_se^2 + (self$all_coef - self$coef)^2, 1, - function(x) median(x, na.rm = TRUE))/n_obs) + function(x) median(x, na.rm = TRUE)) / n_obs) invisible(self) }, @@ -1304,10 +1388,10 @@ DoubleML = R6Class("DoubleML", } if (self$apply_cross_fitting) { - J = mean(private$get__psi_a()) - boot_coef = weights %*% private$get__psi() / (n_obs * J) - boot_t_stat = weights %*% private$get__psi() / - (n_obs * private$get__all_se() * J) + J = mean(private$get__psi_a()) + boot_coef = weights %*% private$get__psi() / (n_obs * J) + boot_t_stat = weights %*% private$get__psi() / + (n_obs * private$get__all_se() * J) } else { J = mean(private$get__psi_a()[test_index]) boot_coef = weights %*% private$get__psi()[test_index] / diff --git a/R/double_ml_data.R b/R/double_ml_data.R index 1f8a06da..6b987542 100644 --- a/R/double_ml_data.R +++ b/R/double_ml_data.R @@ -25,15 +25,19 @@ DoubleMLData = R6Class("DoubleMLData", #' @field all_variables (`character()`)\cr #' All variables available in the dataset. all_variables = function(value) { - if (missing(value)) return(names(self$data)) - else stop("can't set field all_variables") + if (missing(value)) { + return(names(self$data)) + } else { + stop("can't set field all_variables") + } }, #' @field d_cols (`character()`)\cr #' The treatment variable(s). d_cols = function(value) { - if (missing(value)) return(private$d_cols_) - else { + if (missing(value)) { + return(private$d_cols_) + } else { d_cols = value # to get more meaningful assert error messages reset_value = !is.null(self$data_model) assert_character(d_cols, unique = TRUE) @@ -49,37 +53,52 @@ DoubleMLData = R6Class("DoubleMLData", #' @field data ([`data.table`][data.table::data.table()])\cr #' Data object. data = function(value) { - if (missing(value)) return(private$data_) - else stop("can't set field data") + if (missing(value)) { + return(private$data_) + } else { + stop("can't set field data") + } }, #' @field data_model ([`data.table`][data.table::data.table()])\cr #' Internal data object that implements the causal model as specified by #' the user via `y_col`, `d_cols`, `x_cols` and `z_cols`. data_model = function(value) { - if (missing(value)) return(private$data_model_) - else stop("can't set field data_model") + if (missing(value)) { + return(private$data_model_) + } else { + stop("can't set field data_model") + } }, #' @field n_instr (`NULL`, `integer(1)`) \cr #' The number of instruments. n_instr = function(value) { - if (missing(value)) return(length(self$z_cols)) - else stop("can't set field n_instr") + if (missing(value)) { + return(length(self$z_cols)) + } else { + stop("can't set field n_instr") + } }, #' @field n_obs (`integer(1)`) \cr #' The number of observations. n_obs = function(value) { - if (missing(value)) return(dim(self$data)[1]) - else stop("can't set field n_obs") + if (missing(value)) { + return(dim(self$data)[1]) + } else { + stop("can't set field n_obs") + } }, #' @field n_treat (`integer(1)`) \cr #' The umber of treatment variables. n_treat = function(value) { - if (missing(value)) return(length(self$d_cols)) - else stop("can't set field n_treat") + if (missing(value)) { + return(length(self$d_cols)) + } else { + stop("can't set field n_treat") + } }, #' @field other_treat_cols (`NULL`, `character()`) \cr @@ -89,23 +108,30 @@ DoubleMLData = R6Class("DoubleMLData", #' the fitting stage. If `use_other_treat_as_covariate` is `FALSE`, #' `other_treat_cols` is `NULL`. other_treat_cols = function(value) { - if (missing(value)) return(private$other_treat_cols_) - else stop("can't set field other_treat_cols") + if (missing(value)) { + return(private$other_treat_cols_) + } else { + stop("can't set field other_treat_cols") + } }, #' @field treat_col (`character(1)`) \cr #' "Active" treatment variable in the multiple-treatment case. treat_col = function(value) { - if (missing(value)) return(private$treat_col_) - else stop("can't set field treat_col") + if (missing(value)) { + return(private$treat_col_) + } else { + stop("can't set field treat_col") + } }, #' @field use_other_treat_as_covariate (`logical(1)`) \cr #' Indicates whether in the multiple-treatment case the other treatment #' variables should be added as covariates. Default is `TRUE`. use_other_treat_as_covariate = function(value) { - if (missing(value)) return(private$use_other_treat_as_covariate_) - else { + if (missing(value)) { + return(private$use_other_treat_as_covariate_) + } else { use_other_treat_as_covariate = value # to get more meaningful assert error messages reset_value = !is.null(self$data_model) assert_logical(use_other_treat_as_covariate, len = 1) @@ -123,8 +149,9 @@ DoubleMLData = R6Class("DoubleMLData", #' `d_cols`, nor as instrumental variables `z_cols` are used as covariates. #' Default is `NULL`. x_cols = function(value) { - if (missing(value)) return(private$x_cols_) - else { + if (missing(value)) { + return(private$x_cols_) + } else { x_cols = value # to get more meaningful assert error messages reset_value = !is.null(self$data_model) if (!is.null(x_cols)) { @@ -153,8 +180,9 @@ DoubleMLData = R6Class("DoubleMLData", #' @field y_col (`character(1)`) \cr #' The outcome variable. y_col = function(value) { - if (missing(value)) return(private$y_col_) - else { + if (missing(value)) { + return(private$y_col_) + } else { y_col = value # to get more meaningful assert error messages reset_value = !is.null(self$data_model) assert_character(y_col, len = 1) @@ -170,8 +198,9 @@ DoubleMLData = R6Class("DoubleMLData", #' @field z_cols (`NULL`, `character()`) \cr #' The instrumental variables. Default is `NULL`. z_cols = function(value) { - if (missing(value)) return(private$z_cols_) - else { + if (missing(value)) { + return(private$z_cols_) + } else { z_cols = value # to get more meaningful assert error messages reset_value = !is.null(self$data_model) if (!is.null(z_cols)) { @@ -239,7 +268,7 @@ DoubleMLData = R6Class("DoubleMLData", invisible(self) }, - + #' @description #' Print DoubleMLData objects. print = function() { @@ -252,10 +281,10 @@ DoubleMLData = R6Class("DoubleMLData", "Instrument(s): ", paste0(self$z_cols, collapse = ", "), "\n", "No. Observations: ", self$n_obs, "\n") cat(header, "\n", - "\n------------------ Data summary ------------------\n", - data_info, - sep = "") - + "\n------------------ Data summary ------------------\n", + data_info, + sep = "") + invisible(self) }, @@ -395,9 +424,9 @@ double_ml_data_from_data_frame = function(df, x_cols = NULL, y_col = NULL, d_cols = NULL, z_cols = NULL, use_other_treat_as_covariate = TRUE) { data = DoubleMLData$new(df, - x_cols = x_cols, y_col = y_col, d_cols = d_cols, - z_cols = z_cols, - use_other_treat_as_covariate = use_other_treat_as_covariate) + x_cols = x_cols, y_col = y_col, d_cols = d_cols, + z_cols = z_cols, + use_other_treat_as_covariate = use_other_treat_as_covariate) return(data) } diff --git a/R/double_ml_iivm.R b/R/double_ml_iivm.R index ddaf9f2b..5655b7c8 100644 --- a/R/double_ml_iivm.R +++ b/R/double_ml_iivm.R @@ -103,24 +103,33 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM", #' always takers in the sample. The entry `never_takers` (`logical(1)`) #' speficies whether there are never takers in the sample. subgroups = function(value) { - if (missing(value)) return(private$subgroups_) - else stop("can't set field subgroups") + if (missing(value)) { + return(private$subgroups_) + } else { + stop("can't set field subgroups") + } }, #' @field trimming_rule (`character(1)`) \cr #' A `character(1)` specifying the trimming approach. trimming_rule = function(value) { - if (missing(value)) return(private$trimming_rule_) - else stop("can't set field trimming_rule") + if (missing(value)) { + return(private$trimming_rule_) + } else { + stop("can't set field trimming_rule") + } }, #' @field trimming_threshold (`numeric(1)`) \cr #' The threshold used for timming. trimming_threshold = function(value) { - if (missing(value)) return(private$trimming_threshold_) - else stop("can't set field trimming_threshold") + if (missing(value)) { + return(private$trimming_threshold_) + } else { + stop("can't set field trimming_threshold") + } }), - + public = list( #' @description #' Creates a new instance of this R6 class. @@ -229,7 +238,7 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM", dml_procedure, draw_sample_splitting, apply_cross_fitting) - + private$check_data(self$data) private$check_score(self$score) private$learner_class = list( @@ -484,7 +493,7 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM", "needs to be specified as treatment variable.") if (one_treat) { binary_treat = test_integerish(obj_dml_data$data[[obj_dml_data$d_cols]], - lower = 0, upper = 1) + lower = 0, upper = 1) if (!(one_treat & binary_treat)) { stop(err_msg) } @@ -500,7 +509,7 @@ DoubleMLIIVM = R6Class("DoubleMLIIVM", "needs to be specified as instrumental variable.") if (one_instr) { binary_instr = test_integerish(obj_dml_data$data[[obj_dml_data$z_cols]], - lower = 0, upper = 1) + lower = 0, upper = 1) if (!(one_instr & binary_instr)) { stop(err_msg) } diff --git a/R/double_ml_irm.R b/R/double_ml_irm.R index d4bb7617..866491fb 100644 --- a/R/double_ml_irm.R +++ b/R/double_ml_irm.R @@ -77,20 +77,26 @@ #' #' @export DoubleMLIRM = R6Class("DoubleMLIRM", - inherit = DoubleML, + inherit = DoubleML, active = list( #' @field trimming_rule (`character(1)`) \cr #' A `character(1)` specifying the trimming approach. trimming_rule = function(value) { - if (missing(value)) return(private$trimming_rule_) - else stop("can't set field trimming_rule") + if (missing(value)) { + return(private$trimming_rule_) + } else { + stop("can't set field trimming_rule") + } }, #' @field trimming_threshold (`numeric(1)`) \cr #' The threshold used for timming. trimming_threshold = function(value) { - if (missing(value)) return(private$trimming_threshold_) - else stop("can't set field trimming_threshold") + if (missing(value)) { + return(private$trimming_threshold_) + } else { + stop("can't set field trimming_threshold") + } }), public = list( @@ -381,7 +387,7 @@ DoubleMLIRM = R6Class("DoubleMLIRM", "needs to be specified as treatment variable.") if (one_treat) { binary_treat = test_integerish(obj_dml_data$data[[obj_dml_data$d_cols]], - lower = 0, upper = 1) + lower = 0, upper = 1) if (!(one_treat & binary_treat)) { stop(err_msg) } diff --git a/R/double_ml_pliv.R b/R/double_ml_pliv.R index e0c46f45..219b4849 100644 --- a/R/double_ml_pliv.R +++ b/R/double_ml_pliv.R @@ -74,17 +74,23 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", #' @field partialX (`logical(1)`) \cr #' Indicates whether covariates \eqn{X} should be partialled out. partialX = function(value) { - if (missing(value)) return(private$partialX_) - else stop("can't set field partialX") + if (missing(value)) { + return(private$partialX_) + } else { + stop("can't set field partialX") + } }, #' @field partialZ (`logical(1)`) \cr #' Indicates whether instruments \eqn{Z} should be partialled out. partialZ = function(value) { - if (missing(value)) return(private$partialZ_) - else stop("can't set field partialZ") + if (missing(value)) { + return(private$partialZ_) + } else { + stop("can't set field partialZ") + } }), - + public = list( #' @description #' Creates a new instance of this R6 class. @@ -182,7 +188,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", dml_procedure, draw_sample_splitting, apply_cross_fitting) - + private$check_data(self$data) private$check_score(self$score) assert_logical(partialX, len = 1) @@ -357,8 +363,9 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", psi_b = psi_b) } else if (is.function(self$score)) { if (self$data$n_instr > 1) { - stop(paste("Callable score not implemented for DoubleMLPLIV with", - "partialX=TRUE and partialZ=FALSE with several instruments.")) + stop(paste( + "Callable score not implemented for DoubleMLPLIV with", + "partialX=TRUE and partialZ=FALSE with several instruments.")) } psis = self$score(y, z, d, g_hat, m_hat, r_hat, smpls) } @@ -393,7 +400,7 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", m_hat = m_hat_list$preds data_aux_list = lapply(m_hat_list$train_preds, function(x) { setnafill(data.table(self$data$data_model, "m_hat_on_train" = x), - fill = -9999.99) # mlr3 does not allow NA's (values are not used) + fill = -9999.99) # mlr3 does not allow NA's (values are not used) }) m_hat_tilde = dml_cv_predict(self$learner$ml_r, @@ -424,8 +431,9 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", psi_a = psi_a, psi_b = psi_b) } else if (is.function(self$score)) { - stop(paste("Callable score not implemented for DoubleMLPLIV", - "with partialX=TRUE and partialZ=TRUE.")) + stop(paste( + "Callable score not implemented for DoubleMLPLIV", + "with partialX=TRUE and partialZ=TRUE.")) # res = self$score(y, d, g_hat, m_hat, m_hat_tilde) } res$preds = list( @@ -507,8 +515,9 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", } res = list(psi_a = psi_a, psi_b = psi_b) } else if (is.function(self$score)) { - stop(paste("Callable score not implemented for DoubleMLPLIV", - "with partialX=FALSE and partialZ=TRUE.")) + stop(paste( + "Callable score not implemented for DoubleMLPLIV", + "with partialX=FALSE and partialZ=TRUE.")) # res = self$score(y, z, d, r_hat) } res$preds = list("ml_r" = r_hat) diff --git a/R/helper.R b/R/helper.R index d6360e00..e5c36f81 100644 --- a/R/helper.R +++ b/R/helper.R @@ -173,7 +173,7 @@ extract_prediction = function(obj_resampling, learner_class, n_obs, 1:n_iters, function(x) as.data.table(obj_resampling$predictions("train")[[x]])) for (i_iter in 1:n_iters) { - preds_vec = as.numeric(rep(NA, n_obs)) + preds_vec = rep(NA_real_, n_obs) f_hat = f_hat_list[[i_iter]] preds_vec[f_hat[[ind_name]]] = f_hat[[resp_name]] preds[[i_iter]] = preds_vec @@ -189,7 +189,7 @@ extract_prediction = function(obj_resampling, learner_class, n_obs, } } } else { - preds = as.numeric(rep(NA, n_obs)) + preds = rep(NA_real_, n_obs) if (testR6(obj_resampling, classes = "ResampleResult")) obj_resampling = list(obj_resampling) n_obj_rsmp = length(obj_resampling) for (i_obj_rsmp in 1:n_obj_rsmp) { @@ -229,8 +229,9 @@ initiate_task = function(id, data, target, select_cols, learner_class) { task = TaskRegr$new(id = id, backend = data, target = target) } else if (learner_class == "LearnerClassif") { data[[target]] = factor(data[[target]]) - assert_set_equal(levels(data[[target]]), - c("0", "1")) + assert_set_equal( + levels(data[[target]]), + c("0", "1")) task = TaskClassif$new( id = id, backend = data, target = target, positive = "1") @@ -350,24 +351,31 @@ check_is_partition = function(ind, n_obs) { } } -check_smpl_split = function(smpl, n_obs, check_intersect=FALSE) { +check_smpl_split = function(smpl, n_obs, check_intersect = FALSE) { + assert_list(smpl, names = "named") - assert_set_equal(names(smpl), c('train_ids', 'test_ids')) + assert_set_equal(names(smpl), c("train_ids", "test_ids")) assert_list(smpl$train_ids, names = "unnamed") assert_list(smpl$test_ids, names = "unnamed") if (length(smpl$train_ids) != length(smpl$test_ids)) { stop("Number of folds for train and test samples do not match.") } - lapply(smpl$train_ids, function(train_ids) - assert_vector(train_ids, any.missing = FALSE, all.missing = FALSE, - unique = TRUE, max.len = n_obs)) - lapply(smpl$train_ids, function(train_ids) - assert_subset(train_ids, seq(n_obs))) - lapply(smpl$test_ids, function(test_ids) - assert_vector(test_ids, any.missing = FALSE, all.missing = FALSE, - unique = TRUE, max.len = n_obs)) - lapply(smpl$test_ids, function(test_ids) - assert_subset(test_ids, seq(n_obs))) + lapply(smpl$train_ids, function(train_ids) { + assert_vector(train_ids, + any.missing = FALSE, all.missing = FALSE, + unique = TRUE, max.len = n_obs) + }) + lapply(smpl$train_ids, function(train_ids) { + assert_subset(train_ids, seq(n_obs)) + }) + lapply(smpl$test_ids, function(test_ids) { + assert_vector(test_ids, + any.missing = FALSE, all.missing = FALSE, + unique = TRUE, max.len = n_obs) + }) + lapply(smpl$test_ids, function(test_ids) { + assert_subset(test_ids, seq(n_obs)) + }) if (check_intersect) { for (i_fold in seq(length(length(smpl$train_ids)))) { @@ -376,4 +384,3 @@ check_smpl_split = function(smpl, n_obs, check_intersect=FALSE) { } return(TRUE) } - diff --git a/tests/testthat/helper-01-helper_functions.R b/tests/testthat/helper-01-helper_functions.R index 4d535435..d63f8ce5 100644 --- a/tests/testthat/helper-01-helper_functions.R +++ b/tests/testthat/helper-01-helper_functions.R @@ -1,5 +1,5 @@ se_repeated = function(se_s, coefficients, theta_s) { - se = sqrt(stats::median(se_s^2 + (theta_s - coefficients)^2)) + se = sqrt(stats::median(se_s^2 + (theta_s - coefficients)^2)) return(se) } @@ -31,17 +31,17 @@ draw_bootstrap_weights = function(bootstrap, n_rep_boot, n_obs) { stop("invalid boot method") } weights = matrix(weights, nrow = n_rep_boot, ncol = n_obs, byrow = TRUE) - + return(weights) } functional_bootstrap = function(theta, se, psi, psi_a, k, smpls, - n_rep_boot, weights) { + n_rep_boot, weights) { score = psi J = mean(psi_a) - boot_coef = matrix(NA, nrow = 1, ncol = n_rep_boot) - boot_t_stat = matrix(NA, nrow = 1, ncol = n_rep_boot) + boot_coef = matrix(NA_real_, nrow = 1, ncol = n_rep_boot) + boot_t_stat = matrix(NA_real_, nrow = 1, ncol = n_rep_boot) for (i in seq(n_rep_boot)) { boot_coef[1, i] = mean(weights[i, ] * 1 / J * score) boot_t_stat[1, i] = boot_coef[1, i] / se diff --git a/tests/testthat/helper-03-dgp.R b/tests/testthat/helper-03-dgp.R index 4757dd92..53017363 100644 --- a/tests/testthat/helper-03-dgp.R +++ b/tests/testthat/helper-03-dgp.R @@ -133,44 +133,40 @@ dgp1_toeplitz = function(n, p, betamax = 4, decay = 0.99, threshold = 0, noiseva return(data) } -make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150, - return_x_vars = FALSE) { +make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150) { + sigma_e_u = matrix(c(1, 0.6, 0.6, 1), ncol = 2) mu_e_u = rep(0, 2) e_u = mvtnorm::rmvnorm(n = n_obs, mean = mu_e_u, sigma = sigma_e_u) epsilon = e_u[, 1] u = e_u[, 2] - + sigma_x = toeplitz(0.5^(0:(dim_x - 1))) mu_x = rep(0, dim_x) x = mvtnorm::rmvnorm(n = n_obs, mean = mu_x, sigma = sigma_x) - + I_z = diag(x = 1, ncol = dim_z, nrow = dim_z) mu_xi = rep(0, dim_z) xi = mvtnorm::rmvnorm(n = n_obs, mean = mu_xi, sigma = 0.25 * I_z) - + beta = 1 / (1:dim_x)^2 gamma = beta delta = 1 / (1:dim_z)^2 - + zeros = matrix(0, nrow = dim_x, ncol = (dim_z - dim_x)) I_x = diag(x = 1, ncol = dim_x, nrow = dim_x) Pi = cbind(I_x, zeros) - + z = x %*% Pi + xi d = x %*% gamma + z %*% delta + u y = alpha * d + x %*% beta + epsilon - + + colnames(x) = paste0("X", 1:dim_x) colnames(z) = paste0("Z", 1:dim_z) colnames(y) = "y" colnames(d) = "d" - - if (return_x_vars) { - data = data.frame(x, y, d, z) - } else { - data = data.frame(y, d, z) - } + + data = data.frame(x, y, d, z) return(data) } - diff --git a/tests/testthat/helper-04-simdata.R b/tests/testthat/helper-04-simdata.R index 727d63f3..a53449ae 100644 --- a/tests/testthat/helper-04-simdata.R +++ b/tests/testthat/helper-04-simdata.R @@ -9,7 +9,7 @@ if (on_cran) { setting_irm = list(theta = 0.5, n = 5000, p = 20) } -setting_pliv_partial = list(theta=1.0, n = 500) +setting_pliv_partial = list(theta = 1.0, n = 500) set.seed(1282) df = dgp1_plr( @@ -18,10 +18,11 @@ df = dgp1_plr( setting$p) Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames) -data_plr = list(df = df, - dml_data = dml_data) + y_col = "y", + d_cols = "d", x_cols = Xnames) +data_plr = list( + df = df, + dml_data = dml_data) set.seed(1282) df = dgp1_iv( @@ -30,10 +31,11 @@ df = dgp1_iv( setting$p) Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE] # note that Xnames includes z2 dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = "z") -data_pliv = list(df = df, - dml_data = dml_data) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = "z") +data_pliv = list( + df = df, + dml_data = dml_data) set.seed(1282) df = dgp1_irm( @@ -42,10 +44,11 @@ df = dgp1_irm( setting_irm$p) Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames) -data_irm = list(df = df, - dml_data = dml_data) + y_col = "y", + d_cols = "d", x_cols = Xnames) +data_irm = list( + df = df, + dml_data = dml_data) set.seed(1282) df = dgp1_irmiv( @@ -54,10 +57,11 @@ df = dgp1_irmiv( setting$p) Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_col = "z") -data_iivm = list(df = df, - dml_data = dml_data) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_col = "z") +data_iivm = list( + df = df, + dml_data = dml_data) set.seed(1282) data_plr_multi = dgp1_toeplitz( @@ -73,10 +77,11 @@ df = make_pliv_CHS2015( return_type = "data.frame") Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) -data_pliv_partialXZ = list(df = df, - dml_data = dml_data) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) +data_pliv_partialXZ = list( + df = df, + dml_data = dml_data) set.seed(1282) dim_z = 5 @@ -87,10 +92,11 @@ df = make_pliv_CHS2015( return_type = "data.frame") Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) -data_pliv_partialX = list(df = df, - dml_data = dml_data) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) +data_pliv_partialX = list( + df = df, + dml_data = dml_data) set.seed(1282) dim_z = 150 @@ -101,10 +107,11 @@ df = make_data_pliv_partialZ( return_x_vars = FALSE) Xnames = NULL dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) -data_pliv_partialZ = list(df = df, - dml_data = dml_data) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) +data_pliv_partialZ = list( + df = df, + dml_data = dml_data) set.seed(1282) dim_z = 150 diff --git a/tests/testthat/helper-05-ml-learner.R b/tests/testthat/helper-05-ml-learner.R index d8244749..df1c7a69 100644 --- a/tests/testthat/helper-05-ml-learner.R +++ b/tests/testthat/helper-05-ml-learner.R @@ -71,8 +71,9 @@ get_default_mlmethod_plr = function(learner, default = FALSE) { ml_m = mlr3::lrn(mlmethod$mlmethod_m) ml_m$param_set$values = params$params_m - return(list(mlmethod = mlmethod, params = params, - ml_g = ml_g, ml_m = ml_m)) + return(list( + mlmethod = mlmethod, params = params, + ml_g = ml_g, ml_m = ml_m)) } get_default_mlmethod_pliv = function(learner) { @@ -153,8 +154,9 @@ get_default_mlmethod_pliv = function(learner) { ml_r = mlr3::lrn(mlmethod$mlmethod_r) ml_r$param_set$values = params$params_r - return(list(mlmethod = mlmethod, params = params, - ml_g = ml_g, ml_m = ml_m, ml_r = ml_r)) + return(list( + mlmethod = mlmethod, params = params, + ml_g = ml_g, ml_m = ml_m, ml_r = ml_r)) } get_default_mlmethod_irm = function(learner) { @@ -185,8 +187,9 @@ get_default_mlmethod_irm = function(learner) { ml_m = mlr3::lrn(mlmethod$mlmethod_m, predict_type = "prob") ml_m$param_set$values = params$params_m - return(list(mlmethod = mlmethod, params = params, - ml_g = ml_g, ml_m = ml_m)) + return(list( + mlmethod = mlmethod, params = params, + ml_g = ml_g, ml_m = ml_m)) } get_default_mlmethod_iivm = function(learner) { @@ -223,6 +226,7 @@ get_default_mlmethod_iivm = function(learner) { ml_r = mlr3::lrn(mlmethod$mlmethod_r, predict_type = "prob") ml_r$param_set$values = params$params_r - return(list(mlmethod = mlmethod, params = params, - ml_g = ml_g, ml_m = ml_m, ml_r = ml_r)) + return(list( + mlmethod = mlmethod, params = params, + ml_g = ml_g, ml_m = ml_m, ml_r = ml_r)) } diff --git a/tests/testthat/helper-08-dml_plr.R b/tests/testthat/helper-08-dml_plr.R index 2eeaaf2b..f0861545 100644 --- a/tests/testthat/helper-08-dml_plr.R +++ b/tests/testthat/helper-08-dml_plr.R @@ -1,29 +1,30 @@ # Double Machine Learning for Partially Linear Regression. dml_plr = function(data, y, d, - n_folds, ml_g, ml_m, - dml_procedure, score, - n_rep = 1, smpls = NULL, - params_g = NULL, params_m = NULL) { + n_folds, ml_g, ml_m, + dml_procedure, score, + n_rep = 1, smpls = NULL, + params_g = NULL, params_m = NULL) { if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - all_thetas = all_ses = rep(NA, n_rep) + all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] stopifnot(length(this_smpl$train_ids) == length(this_smpl$test_ids)) if (length(this_smpl$train_ids) == 1) { - dml_procedure = 'dml1' + dml_procedure = "dml1" } - - res_single_split = fit_plr_single_split(data, y, d, - n_folds, ml_g, ml_m, - dml_procedure, score, - this_smpl, - params_g, params_m) + + res_single_split = fit_plr_single_split( + data, y, d, + n_folds, ml_g, ml_m, + dml_procedure, score, + this_smpl, + params_g, params_m) all_preds[[i_rep]] = res_single_split$all_preds all_thetas[i_rep] = res_single_split$theta @@ -36,39 +37,39 @@ dml_plr = function(data, y, d, } else { n = length(this_smpl$test_ids[[1]]) } - se = se_repeated(all_ses*sqrt(n), all_thetas, theta)/sqrt(n) + se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n) t = theta / se pval = 2 * stats::pnorm(-abs(t)) - names(theta) = names(se) = names(t) = names(pval) =d + names(theta) = names(se) = names(t) = names(pval) = d res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds = all_preds, smpls=smpls) + all_preds = all_preds, smpls = smpls) return(res) } dml_plr_multitreat = function(data, y, d, - n_folds, ml_g, ml_m, - dml_procedure, score, - n_rep = 1, smpls = NULL, - params_g = NULL, params_m = NULL) { - + n_folds, ml_g, ml_m, + dml_procedure, score, + n_rep = 1, smpls = NULL, + params_g = NULL, params_m = NULL) { + if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - + all_preds = all_thetas = all_ses = list() n_d = length(d) - + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] - thetas_this_rep = ses_this_rep = rep(NA, n_d) + thetas_this_rep = ses_this_rep = rep(NA_real_, n_d) all_preds_this_rep = list() - + for (i_d in seq(n_d)) { if (!is.null(params_g)) { this_params_g = params_g[[i_d]] @@ -80,24 +81,25 @@ dml_plr_multitreat = function(data, y, d, } else { this_params_m = NULL } - res_single_split = fit_plr_single_split(data, y, d[i_d], - n_folds, ml_g, ml_m, - dml_procedure, score, - this_smpl, - this_params_g, this_params_m) - + res_single_split = fit_plr_single_split( + data, y, d[i_d], + n_folds, ml_g, ml_m, + dml_procedure, score, + this_smpl, + this_params_g, this_params_m) + all_preds_this_rep[[i_d]] = res_single_split$all_preds thetas_this_rep[i_d] = res_single_split$theta ses_this_rep[i_d] = res_single_split$se } - + all_preds[[i_rep]] = all_preds_this_rep all_thetas[[i_rep]] = thetas_this_rep all_ses[[i_rep]] = ses_this_rep - + } - - theta = se = t = pval = rep(NA, n_d) + + theta = se = t = pval = rep(NA_real_, n_d) if (length(this_smpl$train_ids) > 1) { n = nrow(data) } else { @@ -107,48 +109,50 @@ dml_plr_multitreat = function(data, y, d, theta_vec = unlist(lapply(all_thetas, function(x) x[i_d])) se_vec = unlist(lapply(all_ses, function(x) x[i_d])) theta[i_d] = stats::median(theta_vec) - se[i_d] = se_repeated(se_vec*sqrt(n), theta_vec, theta[i_d])/sqrt(n) + se[i_d] = se_repeated(se_vec * sqrt(n), theta_vec, theta[i_d]) / sqrt(n) t[i_d] = theta[i_d] / se[i_d] pval[i_d] = 2 * stats::pnorm(-abs(t[i_d])) } - + names(theta) = names(se) = names(t) = names(pval) = d res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds = all_preds, smpls=smpls) - + all_preds = all_preds, smpls = smpls) + return(res) } fit_plr_single_split = function(data, y, d, - n_folds, ml_g, ml_m, - dml_procedure, score, smpl, - params_g, params_m) { - + n_folds, ml_g, ml_m, + dml_procedure, score, smpl, + params_g, params_m) { + train_ids = smpl$train_ids test_ids = smpl$test_ids - - all_preds = fit_nuisance_plr(data, y, d, - ml_g, ml_m, - smpl, - params_g, params_m) - - residuals = compute_plr_residuals(data, y, d, n_folds, smpl, - all_preds) + + all_preds = fit_nuisance_plr( + data, y, d, + ml_g, ml_m, + smpl, + params_g, params_m) + + residuals = compute_plr_residuals( + data, y, d, n_folds, smpl, + all_preds) u_hat = residuals$u_hat v_hat = residuals$v_hat D = data[, d] Y = data[, y] - v_hatd = v_hat * D - + v_hatd = v_hat * D + # DML 1 if (dml_procedure == "dml1") { - thetas = rep(NA, n_folds) + thetas = rep(NA_real_, n_folds) for (i in 1:n_folds) { test_index = test_ids[[i]] - + orth_est = orth_plr_dml( u_hat = u_hat[test_index], v_hat = v_hat[test_index], v_hatd = v_hatd[test_index], @@ -163,112 +167,116 @@ fit_plr_single_split = function(data, y, d, v_hatd = v_hatd[test_index] } } - + if (dml_procedure == "dml2") { - orth_est = orth_plr_dml(u_hat = u_hat, v_hat = v_hat, - v_hatd = v_hatd, score = score) + orth_est = orth_plr_dml( + u_hat = u_hat, v_hat = v_hat, + v_hatd = v_hatd, score = score) theta = orth_est$theta } - + se = sqrt(var_plr( theta = theta, d = D, u_hat = u_hat, v_hat = v_hat, v_hatd = v_hatd, score = score)) - + res = list( theta = theta, se = se, all_preds = all_preds) - + return(res) } fit_nuisance_plr = function(data, y, d, - ml_g, ml_m, - smpls, - params_g, params_m) { + ml_g, ml_m, + smpls, + params_g, params_m) { + train_ids = smpls$train_ids test_ids = smpls$test_ids - + # nuisance g g_indx = names(data) != d data_g = data[, g_indx, drop = FALSE] task_g = mlr3::TaskRegr$new(id = paste0("nuis_g_", d), backend = data_g, target = y) - + resampling_g = mlr3::rsmp("custom") resampling_g$instantiate(task_g, train_ids, test_ids) - + if (!is.null(params_g)) { ml_g$param_set$values = params_g } - + r_g = mlr3::resample(task_g, ml_g, resampling_g, store_models = TRUE) g_hat_list = lapply(r_g$predictions(), function(x) x$response) - + # nuisance m if (!is.null(params_m)) { ml_m$param_set$values = params_m } m_indx = names(data) != y data_m = data[, m_indx, drop = FALSE] - + if (checkmate::test_class(ml_m, "LearnerRegr")) { task_m = mlr3::TaskRegr$new(id = paste0("nuis_m_", d), backend = data_m, target = d) - + resampling_m = mlr3::rsmp("custom") resampling_m$instantiate(task_m, train_ids, test_ids) - + r_m = mlr3::resample(task_m, ml_m, resampling_m, store_models = TRUE) m_hat_list = lapply(r_m$predictions(), function(x) x$response) } else if (checkmate::test_class(ml_m, "LearnerClassif")) { ml_m$predict_type = "prob" data_m[[d]] = factor(data_m[[d]]) - task_m = mlr3::TaskClassif$new(id = paste0("nuis_m_", d), backend = data_m, - target = d, positive = "1") - + task_m = mlr3::TaskClassif$new( + id = paste0("nuis_m_", d), backend = data_m, + target = d, positive = "1") + resampling_m = mlr3::rsmp("custom") resampling_m$instantiate(task_m, train_ids, test_ids) - + r_m = mlr3::resample(task_m, ml_m, resampling_m, store_models = TRUE) m_hat_list = lapply(r_m$predictions(), function(x) as.data.table(x)$prob.1) } - + all_preds = list( m_hat_list = m_hat_list, g_hat_list = g_hat_list) - + return(all_preds) } compute_plr_residuals = function(data, y, d, n_folds, smpls, all_preds) { + test_ids = smpls$test_ids - + g_hat_list = all_preds$g_hat_list m_hat_list = all_preds$m_hat_list - + n = nrow(data) D = data[, d] Y = data[, y] - - v_hat = u_hat = w_hat = rep(NA, n) - + + v_hat = u_hat = w_hat = rep(NA_real_, n) + for (i in 1:n_folds) { test_index = test_ids[[i]] - + g_hat = g_hat_list[[i]] m_hat = m_hat_list[[i]] - + u_hat[test_index] = Y[test_index] - g_hat v_hat[test_index] = D[test_index] - m_hat } - residuals = list(u_hat=u_hat, v_hat=v_hat) - + residuals = list(u_hat = u_hat, v_hat = v_hat) + return(residuals) } # Orthogonalized Estimation of Coefficient in PLR orth_plr_dml = function(u_hat, v_hat, v_hatd, score) { - theta = NA + theta = NA_real_ if (score == "partialling out") { res_fit = stats::lm(u_hat ~ 0 + v_hat) @@ -306,17 +314,18 @@ var_plr = function(theta, d, u_hat, v_hat, v_hatd, score) { # Bootstrap Implementation for Partially Linear Regression Model bootstrap_plr = function(thetas, ses, data, y, d, - n_folds, smpls, all_preds, - bootstrap, n_rep_boot, score, - n_rep=1) { + n_folds, smpls, all_preds, + bootstrap, n_rep_boot, score, + n_rep = 1) { for (i_rep in 1:n_rep) { n = nrow(data) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - this_res = boot_plr_single_split(thetas[i_rep], ses[i_rep], - data, y, d, n_folds, smpls[[i_rep]], - all_preds[[i_rep]], - weights, n_rep_boot, score) - if (i_rep==1) { + this_res = boot_plr_single_split( + thetas[i_rep], ses[i_rep], + data, y, d, n_folds, smpls[[i_rep]], + all_preds[[i_rep]], + weights, n_rep_boot, score) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) @@ -328,25 +337,27 @@ bootstrap_plr = function(thetas, ses, data, y, d, boot_plr_multitreat = function(thetas, ses, data, y, d, - n_folds, smpls, all_preds, - bootstrap, n_rep_boot, score, - n_rep=1) { + n_folds, smpls, all_preds, + bootstrap, n_rep_boot, score, + n_rep = 1) { n_d = length(d) for (i_rep in 1:n_rep) { n = nrow(data) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - boot_theta = boot_t_stat = matrix(NA, nrow = n_d, ncol = n_rep_boot) + boot_theta = boot_t_stat = matrix(NA_real_, nrow = n_d, ncol = n_rep_boot) for (i_d in seq(n_d)) { - this_res = boot_plr_single_split(thetas[[i_rep]][i_d], ses[[i_rep]][i_d], - data, y, d[i_d], n_folds, smpls[[i_rep]], - all_preds[[i_rep]][[i_d]], - weights, n_rep_boot, score) + this_res = boot_plr_single_split( + thetas[[i_rep]][i_d], ses[[i_rep]][i_d], + data, y, d[i_d], n_folds, smpls[[i_rep]], + all_preds[[i_rep]][[i_d]], + weights, n_rep_boot, score) boot_theta[i_d, ] = this_res$boot_coef boot_t_stat[i_d, ] = this_res$boot_t_stat } - this_res = list(boot_coef=boot_theta, - boot_t_stat=boot_t_stat) - if (i_rep==1) { + this_res = list( + boot_coef = boot_theta, + boot_t_stat = boot_t_stat) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) @@ -358,15 +369,17 @@ boot_plr_multitreat = function(thetas, ses, data, y, d, boot_plr_single_split = function(theta, se, data, y, d, - n_folds, smpl, all_preds, - weights, n_rep_boot, score) { - residuals = compute_plr_residuals(data, y, d, n_folds, - smpl, all_preds) + n_folds, smpl, all_preds, + weights, n_rep_boot, score) { + + residuals = compute_plr_residuals( + data, y, d, n_folds, + smpl, all_preds) u_hat = residuals$u_hat v_hat = residuals$v_hat D = data[, d] - v_hatd = v_hat * D - + v_hatd = v_hat * D + if (score == "partialling out") { psi = (u_hat - v_hat * theta) * v_hat psi_a = -v_hat * v_hat @@ -375,10 +388,11 @@ boot_plr_single_split = function(theta, se, data, y, d, psi = (u_hat - D * theta) * v_hat psi_a = -v_hatd } - - res = functional_bootstrap(theta, se, - psi, psi_a, n_folds, - smpl, - n_rep_boot, weights) + + res = functional_bootstrap( + theta, se, + psi, psi_a, n_folds, + smpl, + n_rep_boot, weights) return(res) } diff --git a/tests/testthat/helper-09-dml_pliv.R b/tests/testthat/helper-09-dml_pliv.R index 6556eb06..afe10667 100644 --- a/tests/testthat/helper-09-dml_pliv.R +++ b/tests/testthat/helper-09-dml_pliv.R @@ -1,36 +1,39 @@ # Double Machine Learning for Partially Linear Instrumental Variable Regression. dml_pliv = function(data, y, d, z, - n_folds, - ml_g, ml_m, ml_r, - params, dml_procedure, score, - n_rep = 1, smpls=NULL, - params_g = NULL, params_m = NULL, params_r = NULL) { + n_folds, + ml_g, ml_m, ml_r, + params, dml_procedure, score, + n_rep = 1, smpls = NULL, + params_g = NULL, params_m = NULL, params_r = NULL) { + if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - - all_thetas = all_ses = rep(NA, n_rep) + + all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() - + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] train_ids = this_smpl$train_ids test_ids = this_smpl$test_ids - - all_preds[[i_rep]] = fit_nuisance_pliv(data, y, d, z, - ml_g, ml_m, ml_r, - this_smpl, - params_g, params_m, params_r) - - residuals = compute_pliv_residuals(data, y, d, z, n_folds, this_smpl, - all_preds[[i_rep]]) + + all_preds[[i_rep]] = fit_nuisance_pliv( + data, y, d, z, + ml_g, ml_m, ml_r, + this_smpl, + params_g, params_m, params_r) + + residuals = compute_pliv_residuals( + data, y, d, z, n_folds, this_smpl, + all_preds[[i_rep]]) u_hat = residuals$u_hat v_hat = residuals$v_hat w_hat = residuals$w_hat - + # DML 1 if (dml_procedure == "dml1") { - thetas = vars = rep(NA, n_folds) + thetas = vars = rep(NA_real_, n_folds) for (i in 1:n_folds) { test_index = test_ids[[i]] orth_est = orth_pliv_dml( @@ -53,19 +56,19 @@ dml_pliv = function(data, y, d, z, score = score) all_thetas[i_rep] = orth_est$theta } - + all_ses[i_rep] = sqrt(var_pliv( theta = all_thetas[i_rep], u_hat = u_hat, v_hat = v_hat, w_hat = w_hat, score = score)) } - + theta = stats::median(all_thetas) if (length(this_smpl$train_ids) > 1) { n = nrow(data) } else { n = length(this_smpl$test_ids[[1]]) } - se = se_repeated(all_ses*sqrt(n), all_thetas, theta)/sqrt(n) + se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n) t = theta / se pval = 2 * stats::pnorm(-abs(t)) @@ -74,15 +77,16 @@ dml_pliv = function(data, y, d, z, res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds=all_preds, smpls=smpls) + all_preds = all_preds, smpls = smpls) return(res) } fit_nuisance_pliv = function(data, y, d, z, - ml_g, ml_m, ml_r, - smpls, - params_g, params_m, params_r) { + ml_g, ml_m, ml_r, + smpls, + params_g, params_m, params_r) { + train_ids = smpls$train_ids test_ids = smpls$test_ids @@ -90,17 +94,17 @@ fit_nuisance_pliv = function(data, y, d, z, g_indx = names(data) != d & names(data) != z data_g = data[, g_indx, drop = FALSE] task_g = mlr3::TaskRegr$new(id = paste0("nuis_g_", d), backend = data_g, target = y) - + resampling_g = mlr3::rsmp("custom") resampling_g$instantiate(task_g, train_ids, test_ids) - + if (!is.null(params_g)) { ml_g$param_set$values = params_g } - + r_g = mlr3::resample(task_g, ml_g, resampling_g, store_models = TRUE) g_hat_list = lapply(r_g$predictions(), function(x) x$response) - + # nuisance m: E[Z|X] m_indx = names(data) != y & names(data) != d data_m = data[, m_indx, drop = FALSE] @@ -108,10 +112,10 @@ fit_nuisance_pliv = function(data, y, d, z, if (!is.null(params_m)) { ml_m$param_set$values = params_m } - + resampling_m = mlr3::rsmp("custom") resampling_m$instantiate(task_m, train_ids, test_ids) - + r_m = mlr3::resample(task_m, ml_m, resampling_m, store_models = TRUE) m_hat_list = lapply(r_m$predictions(), function(x) x$response) @@ -122,13 +126,13 @@ fit_nuisance_pliv = function(data, y, d, z, if (!is.null(params_r)) { ml_r$param_set$values = params_r } - + resampling_r = mlr3::rsmp("custom") resampling_r$instantiate(task_r, train_ids, test_ids) - + r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE) r_hat_list = lapply(r_r$predictions(), function(x) x$response) - + all_preds = list( m_hat_list = m_hat_list, g_hat_list = g_hat_list, @@ -138,31 +142,32 @@ fit_nuisance_pliv = function(data, y, d, z, } compute_pliv_residuals = function(data, y, d, z, n_folds, smpls, all_preds) { + test_ids = smpls$test_ids m_hat_list = all_preds$m_hat_list g_hat_list = all_preds$g_hat_list r_hat_list = all_preds$r_hat_list - + n = nrow(data) D = data[, d] Y = data[, y] Z = data[, z] - - v_hat = u_hat = w_hat = rep(NA, n) - + + v_hat = u_hat = w_hat = rep(NA_real_, n) + for (i in 1:n_folds) { test_index = test_ids[[i]] - + m_hat = m_hat_list[[i]] g_hat = g_hat_list[[i]] r_hat = r_hat_list[[i]] - + v_hat[test_index] = D[test_index] - r_hat u_hat[test_index] = Y[test_index] - g_hat w_hat[test_index] = Z[test_index] - m_hat } - residuals = list(u_hat=u_hat, v_hat=v_hat, w_hat=w_hat) + residuals = list(u_hat = u_hat, v_hat = v_hat, w_hat = w_hat) return(residuals) } @@ -191,24 +196,26 @@ var_pliv = function(theta, u_hat, v_hat, w_hat, score) { # Bootstrap Implementation for Partially Linear Regression Model bootstrap_pliv = function(theta, se, data, y, d, z, n_folds, smpls, - all_preds, bootstrap, n_rep_boot, - n_rep=1) { + all_preds, bootstrap, n_rep_boot, + n_rep = 1) { for (i_rep in 1:n_rep) { - residuals = compute_pliv_residuals(data, y, d, z, n_folds, - smpls[[i_rep]], all_preds[[i_rep]]) + residuals = compute_pliv_residuals( + data, y, d, z, n_folds, + smpls[[i_rep]], all_preds[[i_rep]]) u_hat = residuals$u_hat v_hat = residuals$v_hat w_hat = residuals$w_hat - + psi = (u_hat - v_hat * theta[i_rep]) * w_hat - psi_a = - v_hat * w_hat - + psi_a = -v_hat * w_hat + n = length(psi) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - this_res = functional_bootstrap(theta[i_rep], se[i_rep], psi, psi_a, n_folds, - smpls[[i_rep]], - n_rep_boot, weights) - if (i_rep==1) { + this_res = functional_bootstrap( + theta[i_rep], se[i_rep], psi, psi_a, n_folds, + smpls[[i_rep]], + n_rep_boot, weights) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) diff --git a/tests/testthat/helper-10-dml_irm.R b/tests/testthat/helper-10-dml_irm.R index 868460b9..f2936368 100644 --- a/tests/testthat/helper-10-dml_irm.R +++ b/tests/testthat/helper-10-dml_irm.R @@ -1,30 +1,31 @@ # Double Machine Learning for Interactive Regression Model. dml_irm = function(data, y, d, - n_folds, ml_g, ml_m, - dml_procedure, score, - n_rep = 1, smpls = NULL, - trimming_threshold = 1e-12, - params_g = NULL, params_m = NULL) { - + n_folds, ml_g, ml_m, + dml_procedure, score, + n_rep = 1, smpls = NULL, + trimming_threshold = 1e-12, + params_g = NULL, params_m = NULL) { + if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - - all_thetas = all_ses = rep(NA, n_rep) + + all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() - + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] train_ids = this_smpl$train_ids test_ids = this_smpl$test_ids - - all_preds[[i_rep]] = fit_nuisance_irm(data, y, d, - ml_g, ml_m, - train_ids, test_ids, score, - params_g, params_m) + + all_preds[[i_rep]] = fit_nuisance_irm( + data, y, d, + ml_g, ml_m, + train_ids, test_ids, score, + params_g, params_m) res = extract_irm_residuals(data, y, d, n_folds, this_smpl, - all_preds[[i_rep]], score, - trimming_threshold=trimming_threshold) + all_preds[[i_rep]], score, + trimming_threshold = trimming_threshold) u0_hat = res$u0_hat u1_hat = res$u1_hat m_hat = res$m_hat @@ -33,11 +34,11 @@ dml_irm = function(data, y, d, g1_hat = res$g1_hat D = data[, d] Y = data[, y] - + # DML 1 if (dml_procedure == "dml1") { - thetas = vars = rep(NA, n_folds) - + thetas = vars = rep(NA_real_, n_folds) + for (i in 1:n_folds) { test_index = test_ids[[i]] orth_est = orth_irm_dml( @@ -67,20 +68,20 @@ dml_irm = function(data, y, d, y = Y, m = m_hat, score = score) all_thetas[i_rep] = orth_est$theta } - + all_ses[i_rep] = sqrt(var_irm( theta = all_thetas[i_rep], g0_hat = g0_hat, g1_hat = g1_hat, u0_hat = u0_hat, u1_hat = u1_hat, d = D, p_hat = p_hat, m = m_hat, y = Y, score = score)) } - + theta = stats::median(all_thetas) if (length(this_smpl$train_ids) > 1) { n = nrow(data) } else { n = length(this_smpl$test_ids[[1]]) } - se = se_repeated(all_ses*sqrt(n), all_thetas, theta)/sqrt(n) + se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n) t = theta / se pval = 2 * stats::pnorm(-abs(t)) @@ -89,30 +90,31 @@ dml_irm = function(data, y, d, res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds = all_preds, smpls=smpls) + all_preds = all_preds, smpls = smpls) return(res) } fit_nuisance_irm = function(data, y, d, - ml_g, ml_m, - train_ids, test_ids, score, - params_g, params_m) { + ml_g, ml_m, + train_ids, test_ids, score, + params_g, params_m) { # Set up task_m first to get resampling (test and train ids) scheme based on full sample # nuisance m + m_indx = names(data) != y data_m = data[, m_indx, drop = FALSE] - + # tbd: handle case with classif vs. regr. for task_p data_m[, d] = factor(data_m[, d]) task_m = mlr3::TaskClassif$new( id = paste0("nuis_p_", d), backend = data_m, target = d, positive = "1") - + resampling_m = mlr3::rsmp("custom") resampling_m$instantiate(task_m, train_ids, test_ids) n_iters = resampling_m$iters - + # train and test ids according to status of d # in each fold, select those with d = 0 train_ids_0 = lapply(1:n_iters, function(x) { @@ -142,10 +144,10 @@ fit_nuisance_irm = function(data, y, d, resampling_g0$instantiate(task_g0, train_ids_0, test_ids) train_ids_g0 = lapply(1:n_iters, function(x) resampling_g0$train_set(x)) test_ids_g0 = lapply(1:n_iters, function(x) resampling_g0$test_set(x)) - + r_g0 = mlr3::resample(task_g0, ml_g0, resampling_g0, store_models = TRUE) g0_hat_list = lapply(r_g0$predictions(), function(x) x$response) - + # nuisance g1: E[Y|D=1, X] if (score == "ATE") { task_g1 = mlr3::TaskRegr$new(id = paste0("nuis_g1_", d), backend = data_g, target = y) @@ -157,7 +159,7 @@ fit_nuisance_irm = function(data, y, d, resampling_g1$instantiate(task_g1, train_ids_1, test_ids) train_ids_g1 = lapply(1:n_iters, function(x) resampling_g1$train_set(x)) test_ids_g1 = lapply(1:n_iters, function(x) resampling_g1$test_set(x)) - + r_g1 = mlr3::resample(task_g1, ml_g1, resampling_g1, store_models = TRUE) g1_hat_list = lapply(r_g1$predictions(), function(x) x$response) } else { @@ -173,49 +175,50 @@ fit_nuisance_irm = function(data, y, d, } extract_irm_residuals = function(data, y, d, n_folds, smpls, all_preds, score, - trimming_threshold) { + trimming_threshold) { + test_ids = smpls$test_ids - + m_hat_list = all_preds$m_hat_list g0_hat_list = all_preds$g0_hat_list g1_hat_list = all_preds$g1_hat_list - + n = nrow(data) D = data[, d] Y = data[, y] - - g0_hat = g1_hat = u0_hat = u1_hat = m_hat = p_hat = rep(NA, n) - + + g0_hat = g1_hat = u0_hat = u1_hat = m_hat = p_hat = rep(NA_real_, n) + for (i in 1:n_folds) { test_index = test_ids[[i]] - + m_hat[test_index] = m_hat_list[[i]] p_hat[test_index] = mean(D[test_index]) g0_hat[test_index] = g0_hat_list[[i]] - + if (score == "ATE") { g1_hat[test_index] = g1_hat_list[[i]] } - + u0_hat[test_index] = Y[test_index] - g0_hat[test_index] - + if (score == "ATE") { u1_hat[test_index] = Y[test_index] - g1_hat[test_index] } } - + m_hat = trim_vec(m_hat, trimming_threshold) - - res = list(u0_hat=u0_hat, u1_hat=u1_hat, - m_hat=m_hat, p_hat=p_hat, - g0_hat=g0_hat, g1_hat=g1_hat) - + + res = list( + u0_hat = u0_hat, u1_hat = u1_hat, + m_hat = m_hat, p_hat = p_hat, + g0_hat = g0_hat, g1_hat = g1_hat) + return(res) } # Orthogonalized Estimation of Coefficient in irm orth_irm_dml = function(g0_hat, g1_hat, u0_hat, u1_hat, d, p_hat, m, y, score) { - if (score == "ATE") { theta = mean(g1_hat - g0_hat + d * (u1_hat) / m - (1 - d) * u0_hat / (1 - m)) } @@ -238,7 +241,7 @@ var_irm = function(theta, g0_hat, g1_hat, u0_hat, u1_hat, d, p_hat, m, y, score) var = 1 / n * mean(((g1_hat - g0_hat + d * (u1_hat) / m - (1 - d) * u0_hat / (1 - m) - theta)^2)) } else if (score == "ATTE") { - var = 1 / n * mean((d * (y - g0_hat) / p_hat - m * (1 - d) * u0_hat / (p_hat * (1 - m)) - d / p_hat * theta)^2) / (mean(d/p_hat)^2) + var = 1 / n * mean((d * (y - g0_hat) / p_hat - m * (1 - d) * u0_hat / (p_hat * (1 - m)) - d / p_hat * theta)^2) / (mean(d / p_hat)^2) } return(c(var)) @@ -246,12 +249,12 @@ var_irm = function(theta, g0_hat, g1_hat, u0_hat, u1_hat, d, p_hat, m, y, score) # Bootstrap Implementation for Interactive Regression Model bootstrap_irm = function(theta, se, data, y, d, n_folds, smpls, all_preds, - score, bootstrap, n_rep_boot, - n_rep=1, trimming_threshold = 1e-12) { + score, bootstrap, n_rep_boot, + n_rep = 1, trimming_threshold = 1e-12) { for (i_rep in 1:n_rep) { res = extract_irm_residuals(data, y, d, n_folds, - smpls[[i_rep]], all_preds[[i_rep]], score, - trimming_threshold=trimming_threshold) + smpls[[i_rep]], all_preds[[i_rep]], score, + trimming_threshold = trimming_threshold) u0_hat = res$u0_hat u1_hat = res$u1_hat m_hat = res$m_hat @@ -259,7 +262,7 @@ bootstrap_irm = function(theta, se, data, y, d, n_folds, smpls, all_preds, g0_hat = res$g0_hat g1_hat = res$g1_hat D = data[, d] - + if (score == "ATE") { psi = g1_hat - g0_hat + D * u1_hat / m_hat - (1 - D) * u0_hat / (1 - m_hat) - theta[i_rep] psi_a = rep(-1, length(D)) @@ -268,13 +271,14 @@ bootstrap_irm = function(theta, se, data, y, d, n_folds, smpls, all_preds, psi = D * u0_hat / p_hat - m_hat * (1 - D) * u0_hat / (p_hat * (1 - m_hat)) - D / p_hat * theta[i_rep] psi_a = -D / p_hat } - + n = length(psi) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - this_res = functional_bootstrap(theta[i_rep], se[i_rep], psi, psi_a, n_folds, - smpls[[i_rep]], - n_rep_boot, weights) - if (i_rep==1) { + this_res = functional_bootstrap( + theta[i_rep], se[i_rep], psi, psi_a, n_folds, + smpls[[i_rep]], + n_rep_boot, weights) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) diff --git a/tests/testthat/helper-11-dml_iivm.R b/tests/testthat/helper-11-dml_iivm.R index 1ccbeb84..c3b3a69e 100644 --- a/tests/testthat/helper-11-dml_iivm.R +++ b/tests/testthat/helper-11-dml_iivm.R @@ -1,33 +1,34 @@ # Double Machine Learning for Interactive Instrumental Variable Regression Model. dml_irmiv = function(data, y, d, z, - n_folds, - ml_g, ml_m, ml_r, - dml_procedure, score, - always_takers = TRUE, never_takers = TRUE, - n_rep = 1, smpls = NULL, - trimming_threshold = 1e-12, - params_g = NULL, params_m = NULL, params_r = NULL) { - + n_folds, + ml_g, ml_m, ml_r, + dml_procedure, score, + always_takers = TRUE, never_takers = TRUE, + n_rep = 1, smpls = NULL, + trimming_threshold = 1e-12, + params_g = NULL, params_m = NULL, params_r = NULL) { + if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - - all_thetas = all_ses = rep(NA, n_rep) + + all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() - + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] train_ids = this_smpl$train_ids test_ids = this_smpl$test_ids - - all_preds[[i_rep]] = fit_nuisance_iivm(data, y, d, z, - ml_g, ml_m, ml_r, - train_ids, test_ids, - always_takers, never_takers, - params_g, params_m, params_r) + + all_preds[[i_rep]] = fit_nuisance_iivm( + data, y, d, z, + ml_g, ml_m, ml_r, + train_ids, test_ids, + always_takers, never_takers, + params_g, params_m, params_r) res = extract_iivm_preds(data, y, d, z, n_folds, - this_smpl, all_preds[[i_rep]], - trimming_threshold=trimming_threshold) + this_smpl, all_preds[[i_rep]], + trimming_threshold = trimming_threshold) m_hat = res$m_hat g0_hat = res$g0_hat g1_hat = res$g1_hat @@ -36,10 +37,10 @@ dml_irmiv = function(data, y, d, z, D = data[, d] Y = data[, y] Z = data[, z] - + # DML 1 if (dml_procedure == "dml1") { - thetas = vars = rep(NA, n_folds) + thetas = vars = rep(NA_real_, n_folds) for (i in 1:n_folds) { test_index = test_ids[[i]] orth_est = orth_irmiv_dml( @@ -71,21 +72,21 @@ dml_irmiv = function(data, y, d, z, score = score) all_thetas[i_rep] = orth_est$theta } - + all_ses[i_rep] = sqrt(var_irmiv( theta = all_thetas[i_rep], m_hat = m_hat, g0_hat = g0_hat, g1_hat = g1_hat, r0_hat = r0_hat, r1_hat = r1_hat, d = D, y = Y, z = Z, score = score)) } - + theta = stats::median(all_thetas) if (length(this_smpl$train_ids) > 1) { n = nrow(data) } else { n = length(this_smpl$test_ids[[1]]) } - se = se_repeated(all_ses*sqrt(n), all_thetas, theta)/sqrt(n) - + se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n) + t = theta / se pval = 2 * stats::pnorm(-abs(t)) @@ -100,16 +101,17 @@ dml_irmiv = function(data, y, d, z, fit_nuisance_iivm = function(data, y, d, z, - ml_g, ml_m, ml_r, - train_ids, test_ids, - always_takers, never_takers, - params_g, params_m, params_r) { + ml_g, ml_m, ml_r, + train_ids, test_ids, + always_takers, never_takers, + params_g, params_m, params_r) { # Set up task_m first to get resampling (test and train ids) scheme based on full sample # nuisance m + m_indx = names(data) != y & names(data) != d data_m = data[, m_indx, drop = FALSE] - + # tbd: handle case with classif vs. regr. for task_m # if (grepl("regr.", mlmethod$mlmethod_p )) { # # task_m = mlr3::TaskRegr$new(id = paste0("nuis_m_", z), backend = data_m, target = z) @@ -123,7 +125,7 @@ fit_nuisance_iivm = function(data, y, d, z, id = paste0("nuis_m_", z), backend = data_m, target = z, positive = "1") # } - + resampling_m = mlr3::rsmp("custom") resampling_m$instantiate(task_m, train_ids, test_ids) n_iters = resampling_m$iters @@ -156,10 +158,10 @@ fit_nuisance_iivm = function(data, y, d, z, resampling_g0$instantiate(task_g0, train_ids_0, test_ids) train_ids_g0 = lapply(1:n_iters, function(x) resampling_g0$train_set(x)) test_ids_g0 = lapply(1:n_iters, function(x) resampling_g0$test_set(x)) - + r_g0 = mlr3::resample(task_g0, ml_g0, resampling_g0, store_models = TRUE) g0_hat_list = lapply(r_g0$predictions(), function(x) x$response) - + # nuisance g1: E[Y|Z=1, X] task_g1 = mlr3::TaskRegr$new(id = paste0("nuis_g1_", z), backend = data_g, target = y) ml_g1 = ml_g$clone() @@ -184,12 +186,12 @@ fit_nuisance_iivm = function(data, y, d, z, if (always_takers == FALSE & never_takers == FALSE) { message("If there are no always-takers and no never-takers, ATE is estimated") } - + if (always_takers == FALSE) { lengths = lapply(test_ids, length) r0_hat_list = lapply(lengths, function(x) rep(0, x)) } - + if (always_takers == TRUE) { task_r0 = mlr3::TaskClassif$new( id = paste0("nuis_r0_", d), backend = data_r, @@ -198,7 +200,7 @@ fit_nuisance_iivm = function(data, y, d, z, if (!is.null(params_r)) { ml_r0$param_set$values = params_r } - + resampling_r0 = mlr3::rsmp("custom") # Train on subset with z == 0 (in each fold) only, predict for all test obs resampling_r0$instantiate(task_r0, train_ids_0, test_ids) @@ -207,12 +209,12 @@ fit_nuisance_iivm = function(data, y, d, z, r_r0 = mlr3::resample(task_r0, ml_r0, resampling_r0, store_models = TRUE) r0_hat_list = lapply(r_r0$predictions(), function(x) x$prob[, "1"]) } - + if (never_takers == FALSE) { lengths = lapply(test_ids, length) r1_hat_list = lapply(lengths, function(x) rep(1, x)) } - + if (never_takers == TRUE) { # nuisance m1: E[E|Z=1, 0] task_r1 = mlr3::TaskClassif$new( @@ -222,7 +224,7 @@ fit_nuisance_iivm = function(data, y, d, z, if (!is.null(params_r)) { ml_r1$param_set$values = params_r } - + resampling_r1 = mlr3::rsmp("custom") # Train on subset with z == 0 (in each fold) only, predict for all test obs resampling_r1$instantiate(task_r1, train_ids_1, test_ids) @@ -244,42 +246,44 @@ fit_nuisance_iivm = function(data, y, d, z, extract_iivm_preds = function(data, y, d, z, n_folds, smpls, all_preds, - trimming_threshold) { + trimming_threshold) { + test_ids = smpls$test_ids - + m_hat_list = all_preds$m_hat_list g0_hat_list = all_preds$g0_hat_list g1_hat_list = all_preds$g1_hat_list r0_hat_list = all_preds$r0_hat_list r1_hat_list = all_preds$r1_hat_list - + n = nrow(data) D = data[, d] Y = data[, y] Z = data[, z] - m_hat = g0_hat = g1_hat = r0_hat = r1_hat = rep(NA, n) - + m_hat = g0_hat = g1_hat = r0_hat = r1_hat = rep(NA_real_, n) + for (i in 1:n_folds) { test_index = test_ids[[i]] - + m_hat[test_index] = m_hat_list[[i]] g0_hat[test_index] = g0_hat_list[[i]] g1_hat[test_index] = g1_hat_list[[i]] r0_hat[test_index] = r0_hat_list[[i]] r1_hat[test_index] = r1_hat_list[[i]] } - + m_hat = trim_vec(m_hat, trimming_threshold) - - res = list(m_hat=m_hat, g0_hat=g0_hat, g1_hat=g1_hat, - r0_hat=r0_hat, r1_hat=r1_hat) + + res = list( + m_hat = m_hat, g0_hat = g0_hat, g1_hat = g1_hat, + r0_hat = r0_hat, r1_hat = r1_hat) return(res) } # Orthogonalized Estimation of Coefficient in irm orth_irmiv_dml = function(m_hat, g0_hat, g1_hat, r0_hat, r1_hat, d, y, z, score) { - theta = NA + theta = NA_real_ if (score == "LATE" | score == "partialling out") { theta = 1 / mean(r1_hat - r0_hat + z * (d - r1_hat) / m_hat - ((1 - z) * (d - r0_hat) / (1 - m_hat))) * @@ -310,12 +314,12 @@ var_irmiv = function(theta, m_hat, g0_hat, g1_hat, r0_hat, r1_hat, d, y, z, scor # Bootstrap Implementation for Interactive Instrumental Variable Regression Model bootstrap_irmiv = function(theta, se, data, y, d, z, n_folds, smpls, all_preds, - score, bootstrap, n_rep_boot, - n_rep=1, trimming_threshold = 1e-12) { + score, bootstrap, n_rep_boot, + n_rep = 1, trimming_threshold = 1e-12) { for (i_rep in 1:n_rep) { res = extract_iivm_preds(data, y, d, z, n_folds, - smpls[[i_rep]], all_preds[[i_rep]], - trimming_threshold = trimming_threshold) + smpls[[i_rep]], all_preds[[i_rep]], + trimming_threshold = trimming_threshold) m_hat = res$m_hat g0_hat = res$g0_hat g1_hat = res$g1_hat @@ -324,24 +328,25 @@ bootstrap_irmiv = function(theta, se, data, y, d, z, n_folds, smpls, all_preds, D = data[, d] Y = data[, y] Z = data[, z] - + if (score == "LATE") { - + psi = g1_hat - g0_hat + Z * (Y - g1_hat) / m_hat - (1 - Z) * (Y - g0_hat) / (1 - m_hat) - (r1_hat - r0_hat + Z * (D - r1_hat) / m_hat - (1 - Z) * (D - r0_hat) / (1 - m_hat)) * theta[i_rep] - + psi_a = -(r1_hat - r0_hat + Z * (D - r1_hat) / m_hat - - (1 - Z) * (D - r0_hat) / (1 - m_hat)) + - (1 - Z) * (D - r0_hat) / (1 - m_hat)) } else { stop("Inference framework for multiplier bootstrap unknown") } - + n = length(psi) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - this_res = functional_bootstrap(theta[i_rep], se[i_rep], psi, psi_a, n_folds, - smpls[[i_rep]], - n_rep_boot, weights) - if (i_rep==1) { + this_res = functional_bootstrap( + theta[i_rep], se[i_rep], psi, psi_a, n_folds, + smpls[[i_rep]], + n_rep_boot, weights) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) diff --git a/tests/testthat/helper-12-p_adjust.R b/tests/testthat/helper-12-p_adjust.R index 969186ce..cb2836ed 100644 --- a/tests/testthat/helper-12-p_adjust.R +++ b/tests/testthat/helper-12-p_adjust.R @@ -67,7 +67,7 @@ p_adjust.DML = function(x, method = "RW", ...) { # v = x$residuals$v # ev = e * v # Ev2 = colMeans(v^2) - # Omegahat = matrix(NA, ncol = k, nrow = k) + # Omegahat = matrix(NA_real_, ncol = k, nrow = k) # for (j in 1:k) { # for (l in 1:k) { # Omegahat[j, l] = Omegahat[l, j] = 1/(Ev2[j] * Ev2[l]) * mean(ev[, j] * ev[, l]) @@ -75,7 +75,7 @@ p_adjust.DML = function(x, method = "RW", ...) { # } # se = sqrt(diag(Omegahat)) # - # Beta_i = matrix(NA, ncol = k, nrow = B) + # Beta_i = matrix(NA_real_, ncol = k, nrow = B) # for (i in 1:B) { # Beta_i[i, ] = MASS::mvrnorm(mu = rep(0, k), Sigma = Omegahat/n) # } diff --git a/tests/testthat/helper-13-dml_pliv_partial_x.R b/tests/testthat/helper-13-dml_pliv_partial_x.R index 6b7e1241..87bc770a 100644 --- a/tests/testthat/helper-13-dml_pliv_partial_x.R +++ b/tests/testthat/helper-13-dml_pliv_partial_x.R @@ -1,35 +1,38 @@ dml_pliv_partial_x = function(data, y, d, z, - n_folds, - ml_g, ml_m, ml_r, - params, dml_procedure, score, - n_rep = 1, smpls=NULL, - params_g = NULL, params_m = NULL, params_r = NULL) { + n_folds, + ml_g, ml_m, ml_r, + params, dml_procedure, score, + n_rep = 1, smpls = NULL, + params_g = NULL, params_m = NULL, params_r = NULL) { + stopifnot(length(z) > 1) if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - - all_thetas = all_ses = rep(NA, n_rep) + + all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() - + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] - - all_preds[[i_rep]] = fit_nuisance_pliv_partial_x(data, y, d, z, - ml_g, ml_m, ml_r, - this_smpl, - params_g, params_m, params_r) - - residuals = compute_pliv_partial_x_residuals(data, y, d, z, n_folds, - this_smpl, - all_preds[[i_rep]]) + + all_preds[[i_rep]] = fit_nuisance_pliv_partial_x( + data, y, d, z, + ml_g, ml_m, ml_r, + this_smpl, + params_g, params_m, params_r) + + residuals = compute_pliv_partial_x_residuals( + data, y, d, z, n_folds, + this_smpl, + all_preds[[i_rep]]) u_hat = residuals$u_hat w_hat = residuals$w_hat r_hat_tilde = residuals$r_hat_tilde - + # DML 1 if (dml_procedure == "dml1") { - thetas = vars = rep(NA, n_folds) + thetas = vars = rep(NA_real_, n_folds) for (i in 1:n_folds) { test_index = this_smpl$test_ids[[i]] orth_est = orth_pliv_partial_x_dml( @@ -47,19 +50,19 @@ dml_pliv_partial_x = function(data, y, d, z, score = score) all_thetas[i_rep] = orth_est$theta } - + all_ses[i_rep] = sqrt(var_pliv_partial_x( theta = all_thetas[i_rep], u_hat = u_hat, w_hat = w_hat, r_hat_tilde = r_hat_tilde, score = score)) } - + theta = stats::median(all_thetas) if (length(this_smpl$train_ids) > 1) { n = nrow(data) } else { n = length(this_smpl$test_ids[[1]]) } - se = se_repeated(all_ses*sqrt(n), all_thetas, theta)/sqrt(n) + se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n) t = theta / se pval = 2 * stats::pnorm(-abs(t)) @@ -68,15 +71,16 @@ dml_pliv_partial_x = function(data, y, d, z, res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds=all_preds, smpls=smpls) + all_preds = all_preds, smpls = smpls) return(res) } fit_nuisance_pliv_partial_x = function(data, y, d, z, - ml_g, ml_m, ml_r, - smpls, - params_g, params_m, params_r) { + ml_g, ml_m, ml_r, + smpls, + params_g, params_m, params_r) { + train_ids = smpls$train_ids test_ids = smpls$test_ids @@ -84,17 +88,17 @@ fit_nuisance_pliv_partial_x = function(data, y, d, z, g_indx = names(data) != d & (names(data) %in% z == FALSE) data_g = data[, g_indx, drop = FALSE] task_g = mlr3::TaskRegr$new(id = paste0("nuis_g_", d), backend = data_g, target = y) - + resampling_g = mlr3::rsmp("custom") resampling_g$instantiate(task_g, train_ids, test_ids) - + if (!is.null(params_g)) { ml_g$param_set$values = params_g } - + r_g = mlr3::resample(task_g, ml_g, resampling_g, store_models = TRUE) g_hat_list = lapply(r_g$predictions(), function(x) x$response) - + # nuisance m: E[Z|X] n_z = length(z) m_hat_list = list() @@ -106,10 +110,10 @@ fit_nuisance_pliv_partial_x = function(data, y, d, z, if (!is.null(params_m)) { this_ml_m$param_set$values = params_m } - + resampling_m = mlr3::rsmp("custom") resampling_m$instantiate(task_m, train_ids, test_ids) - + r_m = mlr3::resample(task_m, this_ml_m, resampling_m, store_models = TRUE) m_hat_list[[i_z]] = lapply(r_m$predictions(), function(x) x$response) } @@ -121,17 +125,17 @@ fit_nuisance_pliv_partial_x = function(data, y, d, z, if (!is.null(params_r)) { ml_g$param_set$values = params_r } - + resampling_r = mlr3::rsmp("custom") resampling_r$instantiate(task_r, train_ids, test_ids) - + r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE) r_hat_list = lapply(r_r$predictions(), function(x) x$response) - + n = nrow(data) - r_hat_array = rep(NA, n) - m_hat_array = matrix(NA, nrow = n, ncol = n_z) - + r_hat_array = rep(NA_real_, n) + m_hat_array = matrix(NA_real_, nrow = n, ncol = n_z) + for (i_fold in seq_len(length(test_ids))) { test_index = test_ids[[i_fold]] r_hat_array[test_index] = r_hat_list[[i_fold]] @@ -141,9 +145,10 @@ fit_nuisance_pliv_partial_x = function(data, y, d, z, } D = data[, d] Z = data[, z] - r_hat_tilde = predict(lm(D - r_hat_array ~ 1 + as.matrix(Z - m_hat_array)), - Z - m_hat_array) - + r_hat_tilde = predict( + lm(D - r_hat_array ~ 1 + as.matrix(Z - m_hat_array)), + Z - m_hat_array) + all_preds = list( g_hat_list = g_hat_list, r_hat_list = r_hat_list, @@ -153,29 +158,30 @@ fit_nuisance_pliv_partial_x = function(data, y, d, z, } compute_pliv_partial_x_residuals = function(data, y, d, z, n_folds, smpls, - all_preds) { + all_preds) { + test_ids = smpls$test_ids g_hat_list = all_preds$g_hat_list r_hat_list = all_preds$r_hat_list r_hat_tilde = all_preds$r_hat_tilde - + n = nrow(data) D = data[, d] Y = data[, y] - - u_hat = w_hat = rep(NA, n) - + + u_hat = w_hat = rep(NA_real_, n) + for (i in 1:n_folds) { test_index = test_ids[[i]] - + g_hat = g_hat_list[[i]] r_hat = r_hat_list[[i]] u_hat[test_index] = Y[test_index] - g_hat w_hat[test_index] = D[test_index] - r_hat } - residuals = list(u_hat=u_hat, w_hat=w_hat, r_hat_tilde=r_hat_tilde) + residuals = list(u_hat = u_hat, w_hat = w_hat, r_hat_tilde = r_hat_tilde) return(residuals) } @@ -192,31 +198,33 @@ orth_pliv_partial_x_dml = function(u_hat, w_hat, r_hat_tilde, score) { var_pliv_partial_x = function(theta, u_hat, w_hat, r_hat_tilde, score) { stopifnot(score == "partialling out") var = mean(1 / length(u_hat) * 1 / (mean(r_hat_tilde * w_hat))^2 * - mean(((u_hat - w_hat * theta) * r_hat_tilde)^2)) + mean(((u_hat - w_hat * theta) * r_hat_tilde)^2)) return(c(var)) } bootstrap_pliv_partial_x = function(theta, se, data, y, d, z, n_folds, smpls, - all_preds, bootstrap, - n_rep_boot, n_rep=1) { + all_preds, bootstrap, + n_rep_boot, n_rep = 1) { for (i_rep in 1:n_rep) { - residuals = compute_pliv_partial_x_residuals(data, y, d, z, n_folds, - smpls[[i_rep]], - all_preds[[i_rep]]) + residuals = compute_pliv_partial_x_residuals( + data, y, d, z, n_folds, + smpls[[i_rep]], + all_preds[[i_rep]]) u_hat = residuals$u_hat w_hat = residuals$w_hat r_hat_tilde = residuals$r_hat_tilde - + psi = (u_hat - w_hat * theta[i_rep]) * r_hat_tilde - psi_a = - r_hat_tilde * w_hat + psi_a = -r_hat_tilde * w_hat n = length(psi) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - this_res = functional_bootstrap(theta[i_rep], se[i_rep], psi, psi_a, n_folds, - smpls[[i_rep]], - n_rep_boot, weights) - if (i_rep==1) { + this_res = functional_bootstrap( + theta[i_rep], se[i_rep], psi, psi_a, n_folds, + smpls[[i_rep]], + n_rep_boot, weights) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) diff --git a/tests/testthat/helper-14-dml_pliv_partial_z.R b/tests/testthat/helper-14-dml_pliv_partial_z.R index ae7f3cc1..09b26f75 100644 --- a/tests/testthat/helper-14-dml_pliv_partial_z.R +++ b/tests/testthat/helper-14-dml_pliv_partial_z.R @@ -1,14 +1,15 @@ dml_pliv_partial_z = function(data, y, d, z, - n_folds, - ml_r, - dml_procedure, score, - n_rep = 1, smpls=NULL, - params_r = NULL) { + n_folds, + ml_r, + dml_procedure, score, + n_rep = 1, smpls = NULL, + params_r = NULL) { + if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - - all_thetas = all_ses = rep(NA, n_rep) + + all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() # check whether data contains Xs @@ -24,15 +25,17 @@ dml_pliv_partial_z = function(data, y, d, z, for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] - - all_preds[[i_rep]] = fit_nuisance_pliv_partial_z(data, y, d, z, - ml_r, - this_smpl, - params_r) - - residuals = compute_pliv_partial_z_residuals(data, y, d, z, n_folds, - this_smpl, - all_preds[[i_rep]]) + + all_preds[[i_rep]] = fit_nuisance_pliv_partial_z( + data, y, d, z, + ml_r, + this_smpl, + params_r) + + residuals = compute_pliv_partial_z_residuals( + data, y, d, z, n_folds, + this_smpl, + all_preds[[i_rep]]) r_hat = residuals$r_hat if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { @@ -45,7 +48,7 @@ dml_pliv_partial_z = function(data, y, d, z, # DML 1 if (dml_procedure == "dml1") { - thetas = vars = rep(NA, n_folds) + thetas = vars = rep(NA_real_, n_folds) for (i in 1:n_folds) { test_index = this_smpl$test_ids[[i]] orth_est = orth_pliv_partial_z_dml( @@ -68,19 +71,19 @@ dml_pliv_partial_z = function(data, y, d, z, score = score) all_thetas[i_rep] = orth_est$theta } - + all_ses[i_rep] = sqrt(var_pliv_partial_z( theta = all_thetas[i_rep], r_hat = r_hat, y = Y, d = D, score = score)) } - + theta = stats::median(all_thetas) if (length(this_smpl$train_ids) > 1) { n = nrow(data) } else { n = length(this_smpl$test_ids[[1]]) } - se = se_repeated(all_ses*sqrt(n), all_thetas, theta)/sqrt(n) + se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n) t = theta / se pval = 2 * stats::pnorm(-abs(t)) @@ -96,9 +99,10 @@ dml_pliv_partial_z = function(data, y, d, z, } fit_nuisance_pliv_partial_z = function(data, y, d, z, - ml_r, - smpls, - params_r) { + ml_r, + smpls, + params_r) { + train_ids = smpls$train_ids test_ids = smpls$test_ids @@ -139,19 +143,20 @@ fit_nuisance_pliv_partial_z = function(data, y, d, z, } compute_pliv_partial_z_residuals = function(data, y, d, z, n_folds, smpls, - all_preds) { + all_preds) { + test_ids = smpls$test_ids r_hat_list = all_preds$r_hat_list n = nrow(data) - r_hat = rep(NA, n) - + r_hat = rep(NA_real_, n) + for (i in 1:n_folds) { test_index = test_ids[[i]] r_hat[test_index] = r_hat_list[[i]] } - residuals = list(r_hat=r_hat) + residuals = list(r_hat = r_hat) return(residuals) } @@ -168,18 +173,19 @@ orth_pliv_partial_z_dml = function(r_hat, y, d, score) { var_pliv_partial_z = function(theta, r_hat, y, d, score) { stopifnot(score == "partialling out") var = mean(1 / length(r_hat) * 1 / (mean(r_hat * d))^2 * - mean(((y - d * theta) * r_hat)^2)) + mean(((y - d * theta) * r_hat)^2)) return(c(var)) } bootstrap_pliv_partial_z = function(theta, se, data, y, d, z, n_folds, smpls, - all_preds, bootstrap, - n_rep_boot, n_rep=1) { + all_preds, bootstrap, + n_rep_boot, n_rep = 1) { for (i_rep in 1:n_rep) { - residuals = compute_pliv_partial_z_residuals(data, y, d, z, n_folds, - smpls[[i_rep]], - all_preds[[i_rep]]) + residuals = compute_pliv_partial_z_residuals( + data, y, d, z, n_folds, + smpls[[i_rep]], + all_preds[[i_rep]]) r_hat = residuals$r_hat if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { @@ -191,14 +197,15 @@ bootstrap_pliv_partial_z = function(theta, se, data, y, d, z, n_folds, smpls, } psi = (Y - D * theta[i_rep]) * r_hat - psi_a = - r_hat * D + psi_a = -r_hat * D n = length(psi) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - this_res = functional_bootstrap(theta[i_rep], se[i_rep], psi, psi_a, n_folds, - smpls[[i_rep]], - n_rep_boot, weights) - if (i_rep==1) { + this_res = functional_bootstrap( + theta[i_rep], se[i_rep], psi, psi_a, n_folds, + smpls[[i_rep]], + n_rep_boot, weights) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) diff --git a/tests/testthat/helper-15-dml_pliv_partial_xz.R b/tests/testthat/helper-15-dml_pliv_partial_xz.R index 93f5e8ff..a6dc29a9 100644 --- a/tests/testthat/helper-15-dml_pliv_partial_xz.R +++ b/tests/testthat/helper-15-dml_pliv_partial_xz.R @@ -1,34 +1,37 @@ dml_pliv_partial_xz = function(data, y, d, z, - n_folds, - ml_g, ml_m, ml_r, - params, dml_procedure, score, - n_rep = 1, smpls=NULL, - params_g = NULL, params_m = NULL, params_r = NULL) { + n_folds, + ml_g, ml_m, ml_r, + params, dml_procedure, score, + n_rep = 1, smpls = NULL, + params_g = NULL, params_m = NULL, params_r = NULL) { + if (is.null(smpls)) { smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data)) } - - all_thetas = all_ses = rep(NA, n_rep) + + all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() - + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] - - all_preds[[i_rep]] = fit_nuisance_pliv_partial_xz(data, y, d, z, - ml_g, ml_m, ml_r, - this_smpl, - params_g, params_m, params_r) - - residuals = compute_pliv_partial_xz_residuals(data, y, d, z, n_folds, - this_smpl, - all_preds[[i_rep]]) + + all_preds[[i_rep]] = fit_nuisance_pliv_partial_xz( + data, y, d, z, + ml_g, ml_m, ml_r, + this_smpl, + params_g, params_m, params_r) + + residuals = compute_pliv_partial_xz_residuals( + data, y, d, z, n_folds, + this_smpl, + all_preds[[i_rep]]) u_hat = residuals$u_hat v_hat = residuals$v_hat w_hat = residuals$w_hat - + # DML 1 if (dml_procedure == "dml1") { - thetas = vars = rep(NA, n_folds) + thetas = vars = rep(NA_real_, n_folds) for (i in 1:n_folds) { test_index = this_smpl$test_ids[[i]] orth_est = orth_pliv_partial_xz_dml( @@ -51,54 +54,55 @@ dml_pliv_partial_xz = function(data, y, d, z, score = score) all_thetas[i_rep] = orth_est$theta } - + all_ses[i_rep] = sqrt(var_pliv_partial_xz( theta = all_thetas[i_rep], u_hat = u_hat, v_hat = v_hat, w_hat = w_hat, score = score)) } - + theta = stats::median(all_thetas) if (length(this_smpl$train_ids) > 1) { n = nrow(data) } else { n = length(this_smpl$test_ids[[1]]) } - se = se_repeated(all_ses*sqrt(n), all_thetas, theta)/sqrt(n) - + se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n) + t = theta / se pval = 2 * stats::pnorm(-abs(t)) - + names(theta) = names(se) = d res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds=all_preds, smpls=smpls) - + all_preds = all_preds, smpls = smpls) + return(res) } fit_nuisance_pliv_partial_xz = function(data, y, d, z, - ml_g, ml_m, ml_r, - smpls, - params_g, params_m, params_r) { + ml_g, ml_m, ml_r, + smpls, + params_g, params_m, params_r) { + train_ids = smpls$train_ids test_ids = smpls$test_ids - + # nuisance g: E[Y|X] g_indx = names(data) != d & (names(data) %in% z == FALSE) data_g = data[, g_indx, drop = FALSE] task_g = mlr3::TaskRegr$new(id = paste0("nuis_g_", d), backend = data_g, target = y) - + resampling_g = mlr3::rsmp("custom") resampling_g$instantiate(task_g, train_ids, test_ids) - + if (!is.null(params_g)) { ml_g$param_set$values = params_g } - + r_g = mlr3::resample(task_g, ml_g, resampling_g, store_models = TRUE) g_hat_list = lapply(r_g$predictions(), function(x) x$response) - + # nuisance m: E[D|XZ] m_indx = (names(data) != y) data_m = data[, m_indx, drop = FALSE] @@ -110,22 +114,22 @@ fit_nuisance_pliv_partial_xz = function(data, y, d, z, resampling_m = mlr3::rsmp("custom") resampling_m$instantiate(task_m, train_ids, test_ids) - + r_m = mlr3::resample(task_m, ml_m, resampling_m, store_models = TRUE) m_hat_list = lapply(r_m$predictions("test"), function(x) x$response) m_hat_list_train = lapply(r_m$predictions("train"), function(x) x$response) - + n = nrow(data) # nuisance r r_hat_list = list() for (i in seq_len(length(train_ids))) { - m_hat_train = rep(NA, n) + m_hat_train = rep(NA_real_, n) train_index = train_ids[[i]] m_hat_train[train_index] = m_hat_list_train[[i]] r_indx = names(data) != y & names(data) != d & (names(data) %in% z == FALSE) - data_r = setnafill(cbind(data[, r_indx, drop = FALSE], m_hat_train), - fill = -1111.11) # mlr3 does not allow NA's (values are not used) - task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", 'm_hat_train'), backend = data_r, target = 'm_hat_train') + data_r = setnafill(cbind(data[, r_indx, drop = FALSE], m_hat_train), + fill = -1111.11) # mlr3 does not allow NA's (values are not used) + task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", "m_hat_train"), backend = data_r, target = "m_hat_train") this_ml_r = ml_r$clone() if (!is.null(params_r)) { this_ml_r$param_set$values = params_r @@ -135,42 +139,43 @@ fit_nuisance_pliv_partial_xz = function(data, y, d, z, r_r = mlr3::resample(task_r, this_ml_r, resampling_r, store_models = TRUE) r_hat_list[[i]] = lapply(r_r$predictions(), function(x) x$response)[[1]] } - + all_preds = list( g_hat_list = g_hat_list, m_hat_list = m_hat_list, r_hat_list = r_hat_list) - + return(all_preds) } compute_pliv_partial_xz_residuals = function(data, y, d, z, n_folds, smpls, - all_preds) { + all_preds) { + test_ids = smpls$test_ids - + g_hat_list = all_preds$g_hat_list m_hat_list = all_preds$m_hat_list r_hat_list = all_preds$r_hat_list - + n = nrow(data) D = data[, d] Y = data[, y] - - u_hat = v_hat = w_hat = rep(NA, n) - + + u_hat = v_hat = w_hat = rep(NA_real_, n) + for (i in 1:n_folds) { test_index = test_ids[[i]] - + g_hat = g_hat_list[[i]] m_hat = m_hat_list[[i]] r_hat = r_hat_list[[i]] - + u_hat[test_index] = Y[test_index] - g_hat v_hat[test_index] = m_hat - r_hat w_hat[test_index] = D[test_index] - r_hat } - residuals = list(u_hat=u_hat, v_hat=v_hat, w_hat=w_hat) - + residuals = list(u_hat = u_hat, v_hat = v_hat, w_hat = w_hat) + return(residuals) } @@ -186,31 +191,33 @@ orth_pliv_partial_xz_dml = function(u_hat, v_hat, w_hat, score) { var_pliv_partial_xz = function(theta, u_hat, v_hat, w_hat, score) { stopifnot(score == "partialling out") var = mean(1 / length(u_hat) * 1 / (mean(v_hat * w_hat))^2 * - mean(((u_hat - w_hat * theta) * v_hat)^2)) + mean(((u_hat - w_hat * theta) * v_hat)^2)) return(c(var)) } bootstrap_pliv_partial_xz = function(theta, se, data, y, d, z, n_folds, smpls, - all_preds, bootstrap, - n_rep_boot, n_rep=1) { + all_preds, bootstrap, + n_rep_boot, n_rep = 1) { for (i_rep in 1:n_rep) { - residuals = compute_pliv_partial_xz_residuals(data, y, d, z, n_folds, - smpls[[i_rep]], - all_preds[[i_rep]]) + residuals = compute_pliv_partial_xz_residuals( + data, y, d, z, n_folds, + smpls[[i_rep]], + all_preds[[i_rep]]) u_hat = residuals$u_hat v_hat = residuals$v_hat w_hat = residuals$w_hat - + psi = (u_hat - w_hat * theta[i_rep]) * v_hat - psi_a = - v_hat * w_hat - + psi_a = -v_hat * w_hat + n = length(psi) weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n) - this_res = functional_bootstrap(theta[i_rep], se[i_rep], psi, psi_a, n_folds, - smpls[[i_rep]], - n_rep_boot, weights) - if (i_rep==1) { + this_res = functional_bootstrap( + theta[i_rep], se[i_rep], psi, psi_a, n_folds, + smpls[[i_rep]], + n_rep_boot, weights) + if (i_rep == 1) { boot_res = this_res } else { boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef) diff --git a/tests/testthat/test-double_ml_active_bindings.R b/tests/testthat/test-double_ml_active_bindings.R index 2d78cf5b..8ef41e41 100644 --- a/tests/testthat/test-double_ml_active_bindings.R +++ b/tests/testthat/test-double_ml_active_bindings.R @@ -2,123 +2,123 @@ context("Unit tests for active bindings of class DoubleML") test_that("Not setable fields", { set.seed(3141) - dml_data = make_plr_CCDDHNR2018(n_obs=101) + dml_data = make_plr_CCDDHNR2018(n_obs = 101) ml_g = lrn("regr.ranger") ml_m = ml_g$clone() dml_plr = DoubleMLPLR$new(dml_data, ml_g, ml_m) - + msg = "can't set field all_coef" expect_error(dml_plr$all_coef <- 5, - regexp = msg) + regexp = msg) msg = "can't set field all_dml1_coef" expect_error(dml_plr$all_dml1_coef <- 5, - regexp = msg) + regexp = msg) msg = "can't set field all_se" expect_error(dml_plr$all_se <- 5, - regexp = msg) + regexp = msg) msg = "can't set field apply_cross_fitting" expect_error(dml_plr$apply_cross_fitting <- FALSE, - regexp = msg) + regexp = msg) msg = "can't set field boot_coef" expect_error(dml_plr$boot_coef <- 5, - regexp = msg) + regexp = msg) msg = "can't set field boot_t_stat" expect_error(dml_plr$boot_t_stat <- 5, - regexp = msg) + regexp = msg) msg = "can't set field coef" expect_error(dml_plr$coef <- 5, - regexp = msg) + regexp = msg) msg = "can't set field data" - expect_error(dml_plr$data <- 'abc', - regexp = msg) + expect_error(dml_plr$data <- "abc", + regexp = msg) msg = "can't set field dml_procedure" - expect_error(dml_plr$dml_procedure <- 'abc', - regexp = msg) + expect_error(dml_plr$dml_procedure <- "abc", + regexp = msg) msg = "can't set field draw_sample_splitting" expect_error(dml_plr$draw_sample_splitting <- FALSE, - regexp = msg) + regexp = msg) msg = "can't set field learner" - expect_error(dml_plr$learner <- 'abc', - regexp = msg) + expect_error(dml_plr$learner <- "abc", + regexp = msg) msg = "can't set field n_folds" expect_error(dml_plr$n_folds <- 5, - regexp = msg) + regexp = msg) msg = "can't set field n_rep" expect_error(dml_plr$n_rep <- 5, - regexp = msg) + regexp = msg) msg = "can't set field params" expect_error(dml_plr$params <- 5, - regexp = msg) + regexp = msg) msg = "can't set field psi" expect_error(dml_plr$psi <- 5, - regexp = msg) + regexp = msg) msg = "can't set field psi_a" expect_error(dml_plr$psi_a <- 5, - regexp = msg) + regexp = msg) msg = "can't set field psi_b" expect_error(dml_plr$psi_b <- 5, - regexp = msg) + regexp = msg) msg = "can't set field predictions" expect_error(dml_plr$predictions <- 5, - regexp = msg) + regexp = msg) msg = "can't set field pval" expect_error(dml_plr$pval <- 5, - regexp = msg) + regexp = msg) msg = "can't set field score" - expect_error(dml_plr$score <- 'abc', - regexp = msg) + expect_error(dml_plr$score <- "abc", + regexp = msg) msg = "can't set field se" expect_error(dml_plr$se <- 5, - regexp = msg) + regexp = msg) msg = "can't set field smpls" expect_error(dml_plr$smpls <- 5, - regexp = msg) + regexp = msg) msg = "can't set field t_stat" expect_error(dml_plr$t_stat <- 5, - regexp = msg) + regexp = msg) msg = "can't set field tuning_res" - expect_error(dml_plr$tuning_res <- list(a=5), - regexp = msg) - + expect_error(dml_plr$tuning_res <- list(a = 5), + regexp = msg) + dml_data = make_pliv_CHS2015(n_obs = 101) ml_g = lrn("regr.ranger") ml_m = ml_g$clone() ml_r = ml_g$clone() dml_pliv = DoubleMLPLIV$new(dml_data, ml_g, ml_m, ml_r) - + msg = "can't set field partialX" expect_error(dml_pliv$partialX <- FALSE, - regexp = msg) + regexp = msg) msg = "can't set field partialZ" expect_error(dml_pliv$partialZ <- FALSE, - regexp = msg) - + regexp = msg) + dml_data = make_irm_data(n_obs = 101) ml_g = lrn("regr.ranger") ml_m = lrn("classif.ranger") dml_irm = DoubleMLIRM$new(dml_data, ml_g, ml_m) - + msg = "can't set field trimming_rule" - expect_error(dml_irm$trimming_rule <- 'abc', - regexp = msg) + expect_error(dml_irm$trimming_rule <- "abc", + regexp = msg) msg = "can't set field trimming_threshold" expect_error(dml_irm$trimming_threshold <- 0.1, - regexp = msg) - + regexp = msg) + dml_data = make_iivm_data(n_obs = 101) ml_g = lrn("regr.ranger") ml_m = lrn("classif.ranger") ml_r = ml_m$clone() dml_iivm = DoubleMLIIVM$new(dml_data, ml_g, ml_m, ml_r) - + msg = "can't set field subgroups" - expect_error(dml_iivm$subgroups <- 'abc', - regexp = msg) + expect_error(dml_iivm$subgroups <- "abc", + regexp = msg) msg = "can't set field trimming_rule" - expect_error(dml_iivm$trimming_rule <- 'abc', - regexp = msg) + expect_error(dml_iivm$trimming_rule <- "abc", + regexp = msg) msg = "can't set field trimming_threshold" expect_error(dml_iivm$trimming_threshold <- 0.1, - regexp = msg) - } + regexp = msg) +} ) diff --git a/tests/testthat/test-double_ml_data.R b/tests/testthat/test-double_ml_data.R index 0430ee4b..c1362252 100644 --- a/tests/testthat/test-double_ml_data.R +++ b/tests/testthat/test-double_ml_data.R @@ -1,508 +1,523 @@ context("Unit tests for DoubleMLData") test_that("Unit tests for DoubleMLData", { - data = data_iivm$df - - # Input: Matrix and vectors - y = data[, "y"] # input: numeric - d = data[, "d"] # input: integer - z = data[, "z"] # input: integer - d2 = as.matrix(cbind(d, d * 2), ncol = 2) - colnames(d2) = c("d1", "d2") - - X_indx1 = names(data) %in% c("y", "d", "z") == FALSE - - check_indx1 = c(names(data)[X_indx1], "y", "d", "z") - X_dt1 = as.data.table(data)[, check_indx1, with = FALSE] - X = as.matrix(data[, X_indx1]) - - # With z and X - D1 = double_ml_data_from_matrix(X, y, d, z) - expect_equal(D1$data, X_dt1) - expect_identical(D1$data_model, X_dt1) - - # No X - D1b = double_ml_data_from_matrix(X = NULL, y, d, z) - X_dt1b = as.data.table(data)[, c("y", "d", "z")] - expect_equal(D1b$data, X_dt1b) - expect_identical(D1b$data_model, X_dt1b) - - # with multiple z - z_mult = cbind(z, d2[, 2]) - # with X - D1_multZ = double_ml_data_from_matrix(X, y, d, z_mult) - multZ_dt1 = as.data.table( - data.frame(data, "z1" = z, "z2" = d2[, 2]))[, c( - names(data)[X_indx1], - "y", "d", "z1", "z2"), + data = data_iivm$df + + # Input: Matrix and vectors + y = data[, "y"] # input: numeric + d = data[, "d"] # input: integer + z = data[, "z"] # input: integer + d2 = as.matrix(cbind(d, d * 2), ncol = 2) + colnames(d2) = c("d1", "d2") + + X_indx1 = names(data) %in% c("y", "d", "z") == FALSE + + check_indx1 = c(names(data)[X_indx1], "y", "d", "z") + X_dt1 = as.data.table(data)[, check_indx1, with = FALSE] + X = as.matrix(data[, X_indx1]) + + # With z and X + D1 = double_ml_data_from_matrix(X, y, d, z) + expect_equal(D1$data, X_dt1) + expect_identical(D1$data_model, X_dt1) + + # No X + D1b = double_ml_data_from_matrix(X = NULL, y, d, z) + X_dt1b = as.data.table(data)[, c("y", "d", "z")] + expect_equal(D1b$data, X_dt1b) + expect_identical(D1b$data_model, X_dt1b) + + # with multiple z + z_mult = cbind(z, d2[, 2]) + # with X + D1_multZ = double_ml_data_from_matrix(X, y, d, z_mult) + multZ_dt1 = as.data.table( + data.frame(data, "z1" = z, "z2" = d2[, 2]))[, c( + names(data)[X_indx1], + "y", "d", "z1", "z2"), + with = FALSE] + expect_equal(D1_multZ$data, multZ_dt1) + expect_equal(D1_multZ$data_model, multZ_dt1) + + # No X + D1b_multZ = double_ml_data_from_matrix(X = NULL, y, d, z_mult) + multZ_dt1b = as.data.table( + data.frame(data, "z1" = z, "z2" = d2[, 2]))[, c("y", "d", "z1", "z2"), with = FALSE] - expect_equal(D1_multZ$data, multZ_dt1) - expect_equal(D1_multZ$data_model, multZ_dt1) - - # No X - D1b_multZ = double_ml_data_from_matrix(X = NULL, y, d, z_mult) - multZ_dt1b = as.data.table( - data.frame(data, "z1" = z, "z2" = d2[, 2]))[, c("y", "d", "z1", "z2"), - with = FALSE] - expect_equal(D1b_multZ$data, multZ_dt1b) - expect_equal(D1b_multZ$data_model, multZ_dt1b) - - # No z, with X - D2 = double_ml_data_from_matrix(X, y, d) - check_indx2 = c(names(data)[X_indx1], "y", "d") - X_dt2 = as.data.table(data)[, check_indx2, with = FALSE] - expect_equal(D2$data, X_dt2) - expect_equal(D2$data_model, X_dt2) - - # No z, no X - D2b = double_ml_data_from_matrix(X = NULL, y, d) - X_dt2b = as.data.table(data)[, c("y", "d"), with = FALSE] - expect_equal(D2b$data, X_dt2b) - expect_equal(D2b$data_model, X_dt2b) - - # test with only 1 d, 1 X, 1 Z - X = as.matrix(data$X1) - D2_1X = double_ml_data_from_matrix(X, y, d) - X_dt21X = as.data.table(data)[, c("X1", "y", "d"), with = FALSE] - expect_equal(D2_1X$data, X_dt21X) - expect_equal(D2_1X$data_model, X_dt21X) - - # Two d variables - X_indx2 = names(data) %in% c("y", "d", "z", "d1", "d2") == FALSE - X2 = as.matrix(data[, X_indx2]) - D3 = double_ml_data_from_matrix(X2, y, d2) - - X_dt3 = as.data.table( - data.frame(data, "d1" = d2[, 1], "d2" = d2[, 2]))[, c( - names(data)[X_indx2], - "y", "d1", "d2"), - with = FALSE] - expect_equal(D3$data, X_dt3) - expect_equal(D3$data_model, X_dt3) - - # set_data_model - D3_setd_multd = D3$clone()$set_data_model("d2") - X_dt3_setd_multd = data.table::copy(X_dt3)[, c( - names(data)[X_indx2], - "y", "d2", "d1"), - with = FALSE] - expect_equal(D3_setd_multd$data, X_dt3) - expect_equal(D3_setd_multd$data_model, X_dt3_setd_multd) - - # Do not include other treatment var in nuisance part - D3_1d = double_ml_data_from_matrix(X2, y, d2, - use_other_treat_as_covariate = FALSE) - # Data backend - expect_equal(D3_1d$data, X_dt3) - - # Data model - X_dt31d = data.table::copy(X_dt3)[, d2 := NULL] - expect_equal(D3_1d$data_model, X_dt31d) - - # set_data_model - D3_setd = D3_1d$clone()$set_data_model("d2") - X_dt3_setd = data.table::copy(X_dt3)[, d1 := NULL] - expect_equal(D3_setd$data_model, X_dt3_setd) - - # Input: Data frame, assign columns by names - d_indx = "d" - y_indx = "y" - z_null = NULL - z_indx = "z" - X_cols1 = names(data[, X_indx1]) - - D4 = double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx) - - D4b = double_ml_data_from_data_frame(data, - x_cols = NULL, - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx) - - # with renamed variables - data_renamed = data - names(data_renamed) = c("outc", "exposure", "instr", paste0("Explr", 1:(ncol(data_renamed) - 3))) - Expl_cols1 = names(data_renamed[, X_indx1]) - D4_renamed = double_ml_data_from_data_frame(data_renamed, - x_cols = Expl_cols1, - y_col = "outc", - d_cols = "exposure", - z_cols = "instr") - - D5 = double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx) - - D5b = double_ml_data_from_data_frame(data, - x_cols = NULL, - y_col = y_indx, - d_cols = d_indx) - - # test with only 1 d, 1 X, 1 Z - D5_1X = double_ml_data_from_data_frame(data, - x_cols = X_cols1[1], - y_col = y_indx, - d_cols = d_indx) - - D6 = double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx, - z_cols = z_null) - - # Two d Variables - data2 = data.frame(data, d2) - d2_indx = colnames(d2) - - D7 = double_ml_data_from_data_frame(data2, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d2_indx, - z_cols = z_null) - D7_setd_multd = D7$clone()$set_data_model("d2") - - D7_1d = double_ml_data_from_data_frame(data2, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d2_indx, - z_cols = z_null, - use_other_treat_as_covariate = FALSE) - D7_setd = D7_1d$clone()$set_data_model("d2") - - expect_error(double_ml_data_from_data_frame(data), - regexp = "Assertion on 'y_col' failed: Must be of type 'character', not 'NULL'.") - - expect_equal(D1$data_model, D4$data_model) - expect_equal(D2$data_model, D5$data_model) - expect_equal(D2_1X$data_model, D5_1X$data_model) - expect_equal(D2$data_model, D6$data_model) - expect_equal(D3$data_model, D7$data_model) - expect_identical(D3_1d$data_model, D7_1d$data_model) - expect_identical(D3_setd_multd$data_model, D7_setd_multd$data_model) - expect_identical(D3_setd$data_model, D7_setd$data_model) - - # renaming / enforced names - expect_equivalent(D4$data, D4_renamed$data) - expect_equivalent(D4$data_model, D4_renamed$data_model) - - # NULL input for x_cols - expect_identical(D1$data[, c("y", "d", "z")], D1b$data) - expect_identical(D4$data, D4b$data) - expect_identical(D5$data[, sort(names(D5$data_model))], D5b$data[, sort(names(D5$data_model))]) - - # Instantiate DoubleMLData - data = data.table::data.table(data) - data2 = data.table::data.table(data2) - D8 = DoubleMLData$new(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx) - - D8_noXcols = DoubleMLData$new(data, - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx) - # with renamed variables - data_renamed = data.table::copy(data) - data_renamed = data.table::setnames(data_renamed, c( - "outc", "exposure", "instr", - paste0("Explr", 1:(ncol(data_renamed) - 3)))) - - Expl_cols1 = names(data_renamed[, X_indx1, with = FALSE]) - D8_renamed = DoubleMLData$new(data_renamed, - x_cols = Expl_cols1, - y_col = "outc", - d_cols = "exposure", - z_cols = "instr") - - D9 = DoubleMLData$new(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx) - - # skip z if not X indx specified - data_noz = data[, which(names(data) != "z"), with = FALSE] - D9_noXcols = DoubleMLData$new(data_noz, - y_col = y_indx, - d_cols = d_indx) - - D9_1X = DoubleMLData$new(data, - x_cols = "X1", - y_col = y_indx, - d_cols = d_indx) - - D10 = DoubleMLData$new(data2, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d2_indx, - z_cols = z_null) - - D10_setd = D10$clone()$set_data_model("d2") - - D10_1d = DoubleMLData$new(data2, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d2_indx, - z_cols = z_null, - use_other_treat_as_covariate = FALSE) - - D10_1d_setd = D10_1d$clone()$set_data_model("d2") - - msg1 = "At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\). Consider using parameter 'use_other_treat_as_covariate'." - - expect_error(double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = c(d_indx, X_cols1[1]), - z_cols = z_indx), - regexp = msg1) - - msg2 = "At least one variable/column is set as covariate \\('x_cols'\\) and instrumental variable in 'z_cols'." - expect_error(double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx, - z_cols = c(z_indx, X_cols1[1])), - regexp = msg2) - - msg3 = "y cannot be set as outcome variable `y_col` and covariate in 'x_cols'." - expect_error(double_ml_data_from_data_frame(data, - x_cols = c(y_indx, X_cols1), - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx), - regexp = msg3) - - msg4 = "At least one variable/column is set as treatment variable \\('d_cols'\\) and instrumental variable in 'z_cols'." - expect_error(double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = c(z_indx, d_indx), - z_cols = z_indx), - regexp = msg4) - - msg5 = "y cannot be set as outcome variable 'y_col' and treatment variable in 'd_cols'." - expect_error(double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = c(d_indx, y_indx), - z_cols = z_indx), - regexp = msg5) - - msg6 = "y cannot be set as outcome variable 'y_col' and instrumental variable in 'z_cols'." - expect_error(double_ml_data_from_data_frame(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx, - z_cols = c(z_indx, y_indx)), - regexp = msg6) - - msg7 = "Assertion on 'x_cols' failed: Contains duplicated values, position 21." - expect_error(double_ml_data_from_data_frame(data, - x_cols = rep(X_cols1, 2), - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx), - regexp = msg7) - - expect_identical(D1$data_model, D8$data_model) - expect_identical(D2$data_model, D9$data_model) - expect_identical(D2_1X$data_model, D9_1X$data_model) - expect_identical(D3$data_model, D10$data_model) - expect_identical(D3_setd_multd$data_model, D10_setd$data_model) - expect_identical(D3_1d$data_model, D10_1d$data_model) - expect_identical(D3_setd$data_model, D10_1d_setd$data_model) - - expect_identical(D4_renamed$data_model, D8_renamed$data_model) - expect_equivalent(D8$data_model, D8_renamed$data_model) - expect_identical(D8$data_model, D8_noXcols$data_model) - expect_identical(D9$data_model, D9_noXcols$data_model) - - # Exception handling - msg8 = "At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\). Consider using parameter 'use_other_treat_as_covariate'." - expect_error(DoubleMLData$new(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = c(d_indx, X_cols1[1]), - z_cols = z_indx), - regexp = msg8) - - msg9 = "At least one variable/column is set as covariate \\('x_cols'\\) and instrumental variable in 'z_cols'." - expect_error(DoubleMLData$new(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx, - z_cols = c(z_indx, X_cols1[1])), - regexp = msg9) - - msg10 = "y cannot be set as outcome variable `y_col` and covariate in 'x_cols'." - expect_error(DoubleMLData$new(data, - x_cols = c(y_indx, X_cols1), - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx), - regexp = msg10) - - msg11 = "At least one variable/column is set as treatment variable \\('d_cols'\\) and instrumental variable in 'z_cols'." - expect_error(DoubleMLData$new(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = c(z_indx, d_indx), - z_cols = z_indx), - regexp = msg11) - - D11 = DoubleMLData$new(data, - x_cols = X_cols1, - y_col = y_indx, - d_cols = d_indx, - z_cols = z_indx) - - msg12 = "Assertion on 'treatment_var' failed: Must be a subset of \\{'d'\\}, but is \\{'X1'\\}." - expect_error(D11$set_data_model(X_cols1[1]), - regexp = msg12) - } + expect_equal(D1b_multZ$data, multZ_dt1b) + expect_equal(D1b_multZ$data_model, multZ_dt1b) + + # No z, with X + D2 = double_ml_data_from_matrix(X, y, d) + check_indx2 = c(names(data)[X_indx1], "y", "d") + X_dt2 = as.data.table(data)[, check_indx2, with = FALSE] + expect_equal(D2$data, X_dt2) + expect_equal(D2$data_model, X_dt2) + + # No z, no X + D2b = double_ml_data_from_matrix(X = NULL, y, d) + X_dt2b = as.data.table(data)[, c("y", "d"), with = FALSE] + expect_equal(D2b$data, X_dt2b) + expect_equal(D2b$data_model, X_dt2b) + + # test with only 1 d, 1 X, 1 Z + X = as.matrix(data$X1) + D2_1X = double_ml_data_from_matrix(X, y, d) + X_dt21X = as.data.table(data)[, c("X1", "y", "d"), with = FALSE] + expect_equal(D2_1X$data, X_dt21X) + expect_equal(D2_1X$data_model, X_dt21X) + + # Two d variables + X_indx2 = names(data) %in% c("y", "d", "z", "d1", "d2") == FALSE + X2 = as.matrix(data[, X_indx2]) + D3 = double_ml_data_from_matrix(X2, y, d2) + + X_dt3 = as.data.table( + data.frame(data, "d1" = d2[, 1], "d2" = d2[, 2]))[, c( + names(data)[X_indx2], + "y", "d1", "d2"), + with = FALSE] + expect_equal(D3$data, X_dt3) + expect_equal(D3$data_model, X_dt3) + + # set_data_model + D3_setd_multd = D3$clone()$set_data_model("d2") + X_dt3_setd_multd = data.table::copy(X_dt3)[, c( + names(data)[X_indx2], + "y", "d2", "d1"), + with = FALSE] + expect_equal(D3_setd_multd$data, X_dt3) + expect_equal(D3_setd_multd$data_model, X_dt3_setd_multd) + + # Do not include other treatment var in nuisance part + D3_1d = double_ml_data_from_matrix(X2, y, d2, + use_other_treat_as_covariate = FALSE) + # Data backend + expect_equal(D3_1d$data, X_dt3) + + # Data model + X_dt31d = data.table::copy(X_dt3)[, d2 := NULL] + expect_equal(D3_1d$data_model, X_dt31d) + + # set_data_model + D3_setd = D3_1d$clone()$set_data_model("d2") + X_dt3_setd = data.table::copy(X_dt3)[, d1 := NULL] + expect_equal(D3_setd$data_model, X_dt3_setd) + + # Input: Data frame, assign columns by names + d_indx = "d" + y_indx = "y" + z_null = NULL + z_indx = "z" + X_cols1 = names(data[, X_indx1]) + + D4 = double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx) + + D4b = double_ml_data_from_data_frame(data, + x_cols = NULL, + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx) + + # with renamed variables + data_renamed = data + names(data_renamed) = c("outc", "exposure", "instr", paste0("Explr", 1:(ncol(data_renamed) - 3))) + Expl_cols1 = names(data_renamed[, X_indx1]) + D4_renamed = double_ml_data_from_data_frame(data_renamed, + x_cols = Expl_cols1, + y_col = "outc", + d_cols = "exposure", + z_cols = "instr") + + D5 = double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx) + + D5b = double_ml_data_from_data_frame(data, + x_cols = NULL, + y_col = y_indx, + d_cols = d_indx) + + # test with only 1 d, 1 X, 1 Z + D5_1X = double_ml_data_from_data_frame(data, + x_cols = X_cols1[1], + y_col = y_indx, + d_cols = d_indx) + + D6 = double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx, + z_cols = z_null) + + # Two d Variables + data2 = data.frame(data, d2) + d2_indx = colnames(d2) + + D7 = double_ml_data_from_data_frame(data2, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d2_indx, + z_cols = z_null) + D7_setd_multd = D7$clone()$set_data_model("d2") + + D7_1d = double_ml_data_from_data_frame(data2, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d2_indx, + z_cols = z_null, + use_other_treat_as_covariate = FALSE) + D7_setd = D7_1d$clone()$set_data_model("d2") + + expect_error(double_ml_data_from_data_frame(data), + regexp = "Assertion on 'y_col' failed: Must be of type 'character', not 'NULL'.") + + expect_equal(D1$data_model, D4$data_model) + expect_equal(D2$data_model, D5$data_model) + expect_equal(D2_1X$data_model, D5_1X$data_model) + expect_equal(D2$data_model, D6$data_model) + expect_equal(D3$data_model, D7$data_model) + expect_identical(D3_1d$data_model, D7_1d$data_model) + expect_identical(D3_setd_multd$data_model, D7_setd_multd$data_model) + expect_identical(D3_setd$data_model, D7_setd$data_model) + + # renaming / enforced names + expect_equivalent(D4$data, D4_renamed$data) + expect_equivalent(D4$data_model, D4_renamed$data_model) + + # NULL input for x_cols + expect_identical(D1$data[, c("y", "d", "z")], D1b$data) + expect_identical(D4$data, D4b$data) + expect_identical(D5$data[, sort(names(D5$data_model))], D5b$data[, sort(names(D5$data_model))]) + + # Instantiate DoubleMLData + data = data.table::data.table(data) + data2 = data.table::data.table(data2) + D8 = DoubleMLData$new(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx) + + D8_noXcols = DoubleMLData$new(data, + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx) + # with renamed variables + data_renamed = data.table::copy(data) + data_renamed = data.table::setnames(data_renamed, c( + "outc", "exposure", "instr", + paste0("Explr", 1:(ncol(data_renamed) - 3)))) + + Expl_cols1 = names(data_renamed[, X_indx1, with = FALSE]) + D8_renamed = DoubleMLData$new(data_renamed, + x_cols = Expl_cols1, + y_col = "outc", + d_cols = "exposure", + z_cols = "instr") + + D9 = DoubleMLData$new(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx) + + # skip z if not X indx specified + data_noz = data[, which(names(data) != "z"), with = FALSE] + D9_noXcols = DoubleMLData$new(data_noz, + y_col = y_indx, + d_cols = d_indx) + + D9_1X = DoubleMLData$new(data, + x_cols = "X1", + y_col = y_indx, + d_cols = d_indx) + + D10 = DoubleMLData$new(data2, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d2_indx, + z_cols = z_null) + + D10_setd = D10$clone()$set_data_model("d2") + + D10_1d = DoubleMLData$new(data2, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d2_indx, + z_cols = z_null, + use_other_treat_as_covariate = FALSE) + + D10_1d_setd = D10_1d$clone()$set_data_model("d2") + + msg1 = "At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\). Consider using parameter 'use_other_treat_as_covariate'." + + expect_error(double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = c(d_indx, X_cols1[1]), + z_cols = z_indx), + regexp = msg1) + + msg2 = "At least one variable/column is set as covariate \\('x_cols'\\) and instrumental variable in 'z_cols'." + expect_error(double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx, + z_cols = c(z_indx, X_cols1[1])), + regexp = msg2) + + msg3 = "y cannot be set as outcome variable `y_col` and covariate in 'x_cols'." + expect_error(double_ml_data_from_data_frame(data, + x_cols = c(y_indx, X_cols1), + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx), + regexp = msg3) + + msg4 = "At least one variable/column is set as treatment variable \\('d_cols'\\) and instrumental variable in 'z_cols'." + expect_error(double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = c(z_indx, d_indx), + z_cols = z_indx), + regexp = msg4) + + msg5 = "y cannot be set as outcome variable 'y_col' and treatment variable in 'd_cols'." + expect_error(double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = c(d_indx, y_indx), + z_cols = z_indx), + regexp = msg5) + + msg6 = "y cannot be set as outcome variable 'y_col' and instrumental variable in 'z_cols'." + expect_error(double_ml_data_from_data_frame(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx, + z_cols = c(z_indx, y_indx)), + regexp = msg6) + + msg7 = "Assertion on 'x_cols' failed: Contains duplicated values, position 21." + expect_error(double_ml_data_from_data_frame(data, + x_cols = rep(X_cols1, 2), + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx), + regexp = msg7) + + expect_identical(D1$data_model, D8$data_model) + expect_identical(D2$data_model, D9$data_model) + expect_identical(D2_1X$data_model, D9_1X$data_model) + expect_identical(D3$data_model, D10$data_model) + expect_identical(D3_setd_multd$data_model, D10_setd$data_model) + expect_identical(D3_1d$data_model, D10_1d$data_model) + expect_identical(D3_setd$data_model, D10_1d_setd$data_model) + + expect_identical(D4_renamed$data_model, D8_renamed$data_model) + expect_equivalent(D8$data_model, D8_renamed$data_model) + expect_identical(D8$data_model, D8_noXcols$data_model) + expect_identical(D9$data_model, D9_noXcols$data_model) + + # Exception handling + msg8 = "At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\). Consider using parameter 'use_other_treat_as_covariate'." + expect_error(DoubleMLData$new(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = c(d_indx, X_cols1[1]), + z_cols = z_indx), + regexp = msg8) + + msg9 = "At least one variable/column is set as covariate \\('x_cols'\\) and instrumental variable in 'z_cols'." + expect_error(DoubleMLData$new(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx, + z_cols = c(z_indx, X_cols1[1])), + regexp = msg9) + + msg10 = "y cannot be set as outcome variable `y_col` and covariate in 'x_cols'." + expect_error(DoubleMLData$new(data, + x_cols = c(y_indx, X_cols1), + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx), + regexp = msg10) + + msg11 = "At least one variable/column is set as treatment variable \\('d_cols'\\) and instrumental variable in 'z_cols'." + expect_error(DoubleMLData$new(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = c(z_indx, d_indx), + z_cols = z_indx), + regexp = msg11) + + D11 = DoubleMLData$new(data, + x_cols = X_cols1, + y_col = y_indx, + d_cols = d_indx, + z_cols = z_indx) + + msg12 = "Assertion on 'treatment_var' failed: Must be a subset of \\{'d'\\}, but is \\{'X1'\\}." + expect_error(D11$set_data_model(X_cols1[1]), + regexp = msg12) +} ) test_that("Unit tests for invalid data", { # PLR with IV - msg = paste("Incompatible data.\\n", - "z has been set as instrumental variable\\(s\\).\\n", - "To fit a partially linear IV regression model use", - "DoubleMLPLIV instead of DoubleMLPLR.") + msg = paste( + "Incompatible data.\\n", + "z has been set as instrumental variable\\(s\\).\\n", + "To fit a partially linear IV regression model use", + "DoubleMLPLIV instead of DoubleMLPLR.") expect_error(DoubleMLPLR$new( data = data_pliv$dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('regr.rpart')), - regexp = msg) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("regr.rpart")), + regexp = msg) # PLIV without IV - msg = paste("Incompatible data.\\n", - "At least one variable must be set as instrumental variable.\\n", - "To fit a partially linear regression model without instrumental", - "variable\\(s\\) use DoubleMLPLR instead of DoubleMLPLIV.") + msg = paste( + "Incompatible data.\\n", + "At least one variable must be set as instrumental variable.\\n", + "To fit a partially linear regression model without instrumental", + "variable\\(s\\) use DoubleMLPLR instead of DoubleMLPLIV.") expect_error(DoubleMLPLIV$new( data = data_plr$dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('regr.rpart'), - ml_r = mlr3::lrn('regr.rpart')), - regexp = msg) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("regr.rpart"), + ml_r = mlr3::lrn("regr.rpart")), + regexp = msg) # IRM with IV - msg = paste("Incompatible data.\\n", - "z has been set as instrumental variable\\(s\\).\\n", - "To fit an interactive IV regression model use", - "DoubleMLIIVM instead of DoubleMLIRM.") + msg = paste( + "Incompatible data.\\n", + "z has been set as instrumental variable\\(s\\).\\n", + "To fit an interactive IV regression model use", + "DoubleMLIIVM instead of DoubleMLIRM.") expect_error(DoubleMLIRM$new( data = data_iivm$dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) - + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) + # IIVM without IV - msg = paste("Incompatible data.\\n", - "To fit an IIVM model with DoubleML", - "exactly one binary variable with values 0 and 1", - "needs to be specified as instrumental variable.") + msg = paste( + "Incompatible data.\\n", + "To fit an IIVM model with DoubleML", + "exactly one binary variable with values 0 and 1", + "needs to be specified as instrumental variable.") expect_error(double_mlplr_obj <- DoubleMLIIVM$new( data = data_irm$dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) # non-binary D for IRM df = data_irm$df - df['d'] = df['d']*5 + df["d"] = df["d"] * 5 dml_data = double_ml_data_from_data_frame(df, y_col = "y", d_cols = "d") - msg = paste("Incompatible data.\\n", - "To fit an IRM model with DoubleML", - "exactly one binary variable with values 0 and 1", - "needs to be specified as treatment variable.") + msg = paste( + "Incompatible data.\\n", + "To fit an IRM model with DoubleML", + "exactly one binary variable with values 0 and 1", + "needs to be specified as treatment variable.") expect_error(DoubleMLIRM$new( data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) - + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) + # non-binary D for IIVM df = data_iivm$df - df['d'] = df['d']*5 - dml_data = double_ml_data_from_data_frame(df, y_col = "y", - d_cols = "d", - z_cols = "z") - msg = paste("Incompatible data.\\n", - "To fit an IIVM model with DoubleML", - "exactly one binary variable with values 0 and 1", - "needs to be specified as treatment variable.") + df["d"] = df["d"] * 5 + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = "d", + z_cols = "z") + msg = paste( + "Incompatible data.\\n", + "To fit an IIVM model with DoubleML", + "exactly one binary variable with values 0 and 1", + "needs to be specified as treatment variable.") expect_error(DoubleMLIIVM$new( data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) - + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) + # non-binary Z for IIVM df = data_iivm$df - df['z'] = df['z']*5 - dml_data = double_ml_data_from_data_frame(df, y_col = "y", - d_cols = "d", - z_cols = "z") - msg = paste("Incompatible data.\\n", - "To fit an IIVM model with DoubleML", - "exactly one binary variable with values 0 and 1", - "needs to be specified as instrumental variable.") + df["z"] = df["z"] * 5 + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = "d", + z_cols = "z") + msg = paste( + "Incompatible data.\\n", + "To fit an IIVM model with DoubleML", + "exactly one binary variable with values 0 and 1", + "needs to be specified as instrumental variable.") expect_error(DoubleMLIIVM$new( data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) # multiple D for IRM df = data_irm$df - dml_data = double_ml_data_from_data_frame(df, y_col = "y", - d_cols = c("d", "X1")) - msg = paste("Incompatible data.\\n", - "To fit an IRM model with DoubleML", - "exactly one binary variable with values 0 and 1", - "needs to be specified as treatment variable.") + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = c("d", "X1")) + msg = paste( + "Incompatible data.\\n", + "To fit an IRM model with DoubleML", + "exactly one binary variable with values 0 and 1", + "needs to be specified as treatment variable.") expect_error(DoubleMLIRM$new( data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) # multiple D for IIVM df = data_iivm$df - dml_data = double_ml_data_from_data_frame(df, y_col = "y", - d_cols = c("d", "X1"), - z_cols = "z") - msg = paste("Incompatible data.\\n", - "To fit an IIVM model with DoubleML", - "exactly one binary variable with values 0 and 1", - "needs to be specified as treatment variable.") + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = c("d", "X1"), + z_cols = "z") + msg = paste( + "Incompatible data.\\n", + "To fit an IIVM model with DoubleML", + "exactly one binary variable with values 0 and 1", + "needs to be specified as treatment variable.") expect_error(DoubleMLIIVM$new( data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) - + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) + # multiple Z for IIVM df = data_iivm$df - dml_data = double_ml_data_from_data_frame(df, y_col = "y", - d_cols = "d", - z_cols = c("z", "X1")) - msg = paste("Incompatible data.\\n", - "To fit an IIVM model with DoubleML", - "exactly one binary variable with values 0 and 1", - "needs to be specified as instrumental variable.") + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = "d", + z_cols = c("z", "X1")) + msg = paste( + "Incompatible data.\\n", + "To fit an IIVM model with DoubleML", + "exactly one binary variable with values 0 and 1", + "needs to be specified as instrumental variable.") expect_error(DoubleMLIIVM$new( data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart', predict_type = "prob")), - regexp = msg) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart", predict_type = "prob")), + regexp = msg) } -) \ No newline at end of file +) diff --git a/tests/testthat/test-double_ml_data_active_bindings.R b/tests/testthat/test-double_ml_data_active_bindings.R index cd54b3a6..9d73aab3 100644 --- a/tests/testthat/test-double_ml_data_active_bindings.R +++ b/tests/testthat/test-double_ml_data_active_bindings.R @@ -3,331 +3,341 @@ context("Unit tests for active bindings of class DoubleMLData") test_that("x_cols setter", { set.seed(3141) df = as.data.frame(matrix(rnorm(20), ncol = 4)) - names(df) = c('yy', 'dd', 'xx1', 'xx2') - - dml_data = double_ml_data_from_data_frame(df, y_col='yy', d_cols='dd') + names(df) = c("yy", "dd", "xx1", "xx2") + + dml_data = double_ml_data_from_data_frame(df, y_col = "yy", d_cols = "dd") expect_equal(dml_data$x_cols, c("xx1", "xx2")) - - dml_data = make_plr_CCDDHNR2018(n_obs=100) + + dml_data = make_plr_CCDDHNR2018(n_obs = 100) orig_x_cols = dml_data$x_cols - + # check that after changing the x_cols, the data_model gets updated - data_comp = as.data.frame(dml_data$data_model)[, c(c('X1', 'X11', 'X13'), dml_data$y_col, dml_data$d_cols)] - dml_data$x_cols = c('X1', 'X11', 'X13') + data_comp = as.data.frame(dml_data$data_model)[, c(c("X1", "X11", "X13"), dml_data$y_col, dml_data$d_cols)] + dml_data$x_cols = c("X1", "X11", "X13") expect_equal(as.data.frame(dml_data$data_model), data_comp) - - msg = paste0("Assertion on 'x_cols' failed: Must be a subset of", - " \\{'X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','X19','X20','y','d'\\},", - " but is \\{'X1','X11','A13'\\}.") - expect_error(dml_data$x_cols <- c('X1', 'X11', 'A13'), - regexp = msg) - + + msg = paste0( + "Assertion on 'x_cols' failed: Must be a subset of", + " \\{'X1','X2','X3','X4','X5','X6','X7','X8','X9','X10','X11','X12','X13','X14','X15','X16','X17','X18','X19','X20','y','d'\\},", + " but is \\{'X1','X11','A13'\\}.") + expect_error(dml_data$x_cols <- c("X1", "X11", "A13"), + regexp = msg) + msg = "Assertion on 'x_cols' failed: Must be of type 'character', not 'double'." expect_error(dml_data$x_cols <- 5, - regexp = msg) - + regexp = msg) + # check single covariate - dml_data$x_cols = 'X13' - expect_equal(dml_data$x_cols, 'X13') - + dml_data$x_cols = "X13" + expect_equal(dml_data$x_cols, "X13") + # check setting None brings us back to orig_x_cols dml_data$x_cols = NULL expect_equal(dml_data$x_cols, orig_x_cols) - } +} ) test_that("d_cols setter", { set.seed(3141) - dml_data = make_plr_CCDDHNR2018(n_obs=101, return_type = "data.frame") - df = dml_data[,1:10] - names(df) = c(paste0("X", 1:7), c('y', 'd1', 'd2')) - dml_data = double_ml_data_from_data_frame(df, y_col = 'y', - d_cols = c('d1', 'd2'), - x_cols = paste0("X", 1:7)) + dml_data = make_plr_CCDDHNR2018(n_obs = 101, return_type = "data.frame") + df = dml_data[, 1:10] + names(df) = c(paste0("X", 1:7), c("y", "d1", "d2")) + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = c("d1", "d2"), + x_cols = paste0("X", 1:7)) expect_equal(dml_data$n_obs, 101) - + # check that after changing d_cols, the data_model gets updated - data_comp = df[, c(paste0("X", 1:7), c('y', 'd2', 'd1'))] - dml_data$d_cols = c('d2', 'd1') + data_comp = df[, c(paste0("X", 1:7), c("y", "d2", "d1"))] + dml_data$d_cols = c("d2", "d1") expect_equal(as.data.frame(dml_data$data_model), data_comp) - - msg = paste0("Assertion on 'd_cols' failed: Must be a subset of", - " \\{'X1','X2','X3','X4','X5','X6','X7','y','d1','d2'\\},", - " but is \\{'d1','d13'\\}.") - expect_error(dml_data$d_cols <- c('d1', 'd13'), - regexp = msg) - - msg = paste0("Assertion on 'd_cols' failed: Must be a subset of", - " \\{'X1','X2','X3','X4','X5','X6','X7','y','d1','d2'\\},", - " but is \\{'d13'\\}.") - expect_error(dml_data$d_cols <- 'd13', - regexp = msg) - + + msg = paste0( + "Assertion on 'd_cols' failed: Must be a subset of", + " \\{'X1','X2','X3','X4','X5','X6','X7','y','d1','d2'\\},", + " but is \\{'d1','d13'\\}.") + expect_error(dml_data$d_cols <- c("d1", "d13"), + regexp = msg) + + msg = paste0( + "Assertion on 'd_cols' failed: Must be a subset of", + " \\{'X1','X2','X3','X4','X5','X6','X7','y','d1','d2'\\},", + " but is \\{'d13'\\}.") + expect_error(dml_data$d_cols <- "d13", + regexp = msg) + msg = "Assertion on 'd_cols' failed: Must be of type 'character', not 'double'." expect_error(dml_data$d_cols <- 5, - regexp = msg) - + regexp = msg) + # check single treatment variable - dml_data$d_cols = 'd2' - expect_equal(dml_data$d_cols, 'd2') + dml_data$d_cols = "d2" + expect_equal(dml_data$d_cols, "d2") expect_equal(dml_data$n_treat, 1) - } +} ) test_that("z_cols setter", { set.seed(3141) - dml_data = make_plr_CCDDHNR2018(n_obs=101, return_type = "data.frame") - df = dml_data[,1:10] - names(df) = c(paste0("X", 1:4), paste0("z", 1:3), c('y', 'd1', 'd2')) - dml_data = double_ml_data_from_data_frame(df, y_col = 'y', - d_cols = c('d1', 'd2'), - x_cols = paste0("X", 1:4), - z_cols = paste0("z", 1:3)) + dml_data = make_plr_CCDDHNR2018(n_obs = 101, return_type = "data.frame") + df = dml_data[, 1:10] + names(df) = c(paste0("X", 1:4), paste0("z", 1:3), c("y", "d1", "d2")) + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = c("d1", "d2"), + x_cols = paste0("X", 1:4), + z_cols = paste0("z", 1:3)) expect_equal(dml_data$n_obs, 101) expect_equal(dml_data$n_treat, 2) expect_equal(dml_data$n_instr, 3) - + # check that z_cols gets updated - dml_data$z_cols = c('z1', 'z2') - expect_equal(dml_data$z_cols, c('z1', 'z2')) + dml_data$z_cols = c("z1", "z2") + expect_equal(dml_data$z_cols, c("z1", "z2")) expect_equal(dml_data$n_instr, 2) - - msg = paste0("Assertion on 'z_cols' failed: Must be a subset of", - " \\{'X1','X2','X3','X4','z1','z2','z3','y','d1','d2'\\},", - " but is \\{'z1','a13'\\}.") - expect_error(dml_data$z_cols <- c('z1', 'a13'), - regexp = msg) - - msg = paste0("Assertion on 'z_cols' failed: Must be a subset of", - " \\{'X1','X2','X3','X4','z1','z2','z3','y','d1','d2'\\},", - " but is \\{'a13'\\}.") - expect_error(dml_data$z_cols <- 'a13', - regexp = msg) - + + msg = paste0( + "Assertion on 'z_cols' failed: Must be a subset of", + " \\{'X1','X2','X3','X4','z1','z2','z3','y','d1','d2'\\},", + " but is \\{'z1','a13'\\}.") + expect_error(dml_data$z_cols <- c("z1", "a13"), + regexp = msg) + + msg = paste0( + "Assertion on 'z_cols' failed: Must be a subset of", + " \\{'X1','X2','X3','X4','z1','z2','z3','y','d1','d2'\\},", + " but is \\{'a13'\\}.") + expect_error(dml_data$z_cols <- "a13", + regexp = msg) + msg = "Assertion on 'z_cols' failed: Must be of type 'character', not 'double'." expect_error(dml_data$z_cols <- 5, - regexp = msg) - + regexp = msg) + # check single instrument - dml_data$z_cols = 'z2' - expect_equal(dml_data$z_cols, 'z2') + dml_data$z_cols = "z2" + expect_equal(dml_data$z_cols, "z2") expect_equal(dml_data$n_instr, 1) - + # check NULL dml_data$z_cols = NULL expect_equal(dml_data$z_cols, NULL) expect_equal(dml_data$n_instr, 0) - } +} ) test_that("y_col setter", { set.seed(3141) - dml_data = make_plr_CCDDHNR2018(n_obs=101, return_type = "data.frame") - df = dml_data[,1:10] - names(df) = c(paste0("X", 1:7), c('y', 'y123', 'd')) + dml_data = make_plr_CCDDHNR2018(n_obs = 101, return_type = "data.frame") + df = dml_data[, 1:10] + names(df) = c(paste0("X", 1:7), c("y", "y123", "d")) dt = data.table::as.data.table(df) - dml_data = DoubleMLData$new(dt, y_col = 'y', - d_cols = 'd', - x_cols = paste0("X", 1:7)) + dml_data = DoubleMLData$new(dt, + y_col = "y", + d_cols = "d", + x_cols = paste0("X", 1:7)) # with the following wrapper the column 'y123' gets removed - #dml_data = double_ml_data_from_data_frame(df, y_col = 'y', + # dml_data = double_ml_data_from_data_frame(df, y_col = 'y', # d_cols = 'd', # x_cols = paste0("X", 1:7)) expect_equal(dml_data$n_obs, 101) - + # check that after changing d_cols, the data_model gets updated - data_comp = df[, c(paste0("X", 1:7), c('y123', 'd'))] - dml_data$y_col = 'y123' - expect_equal(dml_data$y_col, 'y123') + data_comp = df[, c(paste0("X", 1:7), c("y123", "d"))] + dml_data$y_col = "y123" + expect_equal(dml_data$y_col, "y123") expect_equal(as.data.frame(dml_data$data_model), data_comp) - - msg = paste0("Assertion on 'y_col' failed: Must be a subset of", - " \\{'X1','X2','X3','X4','X5','X6','X7','y','y123','d'\\},", - " but is \\{'d13'\\}.") - expect_error(dml_data$y_col <- 'd13', - regexp = msg) - + + msg = paste0( + "Assertion on 'y_col' failed: Must be a subset of", + " \\{'X1','X2','X3','X4','X5','X6','X7','y','y123','d'\\},", + " but is \\{'d13'\\}.") + expect_error(dml_data$y_col <- "d13", + regexp = msg) + msg = "Assertion on 'y_col' failed: Must be of type 'character', not 'double'." expect_error(dml_data$y_col <- 5, - regexp = msg) - } + regexp = msg) +} ) test_that("Tests for use_other_treat_as_covariate", { set.seed(3141) - dml_data = make_plr_CCDDHNR2018(n_obs=101, return_type = "data.frame") - df = dml_data[,1:10] - names(df) = c(paste0("X", 1:7), c('y', 'd1', 'd2')) - dml_data = double_ml_data_from_data_frame(df, y_col = 'y', - d_cols = c('d1', 'd2'), - x_cols = paste0("X", 1:7), - use_other_treat_as_covariate = TRUE) + dml_data = make_plr_CCDDHNR2018(n_obs = 101, return_type = "data.frame") + df = dml_data[, 1:10] + names(df) = c(paste0("X", 1:7), c("y", "d1", "d2")) + dml_data = double_ml_data_from_data_frame(df, + y_col = "y", + d_cols = c("d1", "d2"), + x_cols = paste0("X", 1:7), + use_other_treat_as_covariate = TRUE) expect_equal(dml_data$n_obs, 101) - - dml_data$set_data_model('d1') - expect_equal(dml_data$treat_col, 'd1') - expect_equal(dml_data$other_treat_cols, 'd2') - - dml_data$set_data_model('d2') - expect_equal(dml_data$treat_col, 'd2') - expect_equal(dml_data$other_treat_cols, 'd1') - + + dml_data$set_data_model("d1") + expect_equal(dml_data$treat_col, "d1") + expect_equal(dml_data$other_treat_cols, "d2") + + dml_data$set_data_model("d2") + expect_equal(dml_data$treat_col, "d2") + expect_equal(dml_data$other_treat_cols, "d1") + dml_data$use_other_treat_as_covariate = FALSE expect_equal(dml_data$other_treat_cols, NULL) - - dml_data$set_data_model('d1') - expect_equal(dml_data$treat_col, 'd1') + + dml_data$set_data_model("d1") + expect_equal(dml_data$treat_col, "d1") expect_equal(dml_data$other_treat_cols, NULL) - - dml_data$set_data_model('d2') - expect_equal(dml_data$treat_col, 'd2') + + dml_data$set_data_model("d2") + expect_equal(dml_data$treat_col, "d2") expect_equal(dml_data$other_treat_cols, NULL) - + msg = "Assertion on 'use_other_treat_as_covariate' failed: Must be of type 'logical', not 'double'." expect_error(dml_data$use_other_treat_as_covariate <- 5, - regexp = msg) - + regexp = msg) + msg = "Assertion on 'treatment_var' failed: Must be a subset of \\{'d1','d2'\\}, but is \\{'d3'\\}." - expect_error(dml_data$set_data_model('d3'), - regexp = msg) - + expect_error(dml_data$set_data_model("d3"), + regexp = msg) + msg = "Assertion on 'treatment_var' failed: Must have length <= 1, but has length 2." - expect_error(dml_data$set_data_model(c('d1', 'd2')), - regexp = msg) - } + expect_error(dml_data$set_data_model(c("d1", "d2")), + regexp = msg) +} ) test_that("Disjoint sets", { set.seed(3141) df = as.data.frame(matrix(rnorm(20), ncol = 4)) - names(df) = c('yy', 'dd1', 'xx1', 'xx2') + names(df) = c("yy", "dd1", "xx1", "xx2") dt = data.table::as.data.table(df) - - msg = paste0("At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\).", - " Consider using parameter 'use_other_treat_as_covariate'.") + + msg = paste0( + "At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\).", + " Consider using parameter 'use_other_treat_as_covariate'.") expect_error(DoubleMLData$new(dt, - y_col='yy', - d_cols=c('dd1', 'xx1'), - x_cols=c('xx1', 'xx2')), - regexp = msg) - + y_col = "yy", + d_cols = c("dd1", "xx1"), + x_cols = c("xx1", "xx2")), + regexp = msg) + msg = "yy cannot be set as outcome variable 'y_col' and treatment variable in 'd_cols'." expect_error(DoubleMLData$new(dt, - y_col='yy', - d_cols=c('dd1', 'yy'), - x_cols=c('xx1', 'xx2')), - regexp = msg) - + y_col = "yy", + d_cols = c("dd1", "yy"), + x_cols = c("xx1", "xx2")), + regexp = msg) + msg = "yy cannot be set as outcome variable `y_col` and covariate in 'x_cols'." expect_error(DoubleMLData$new(dt, - y_col='yy', - d_cols='dd1', - x_cols=c('xx1', 'yy', 'xx2')), - regexp = msg) - + y_col = "yy", + d_cols = "dd1", + x_cols = c("xx1", "yy", "xx2")), + regexp = msg) + msg = "yy cannot be set as outcome variable 'y_col' and instrumental variable in 'z_cols'." expect_error(DoubleMLData$new(dt, - y_col='yy', - d_cols='dd1', - x_cols=c('xx1', 'xx2'), - z_cols='yy'), - regexp = msg) - + y_col = "yy", + d_cols = "dd1", + x_cols = c("xx1", "xx2"), + z_cols = "yy"), + regexp = msg) + msg = "At least one variable/column is set as treatment variable \\('d_cols'\\) and instrumental variable in 'z_cols'." expect_error(DoubleMLData$new(dt, - y_col='yy', - d_cols='dd1', - x_cols=c('xx1', 'xx2'), - z_cols='dd1'), - regexp = msg) - + y_col = "yy", + d_cols = "dd1", + x_cols = c("xx1", "xx2"), + z_cols = "dd1"), + regexp = msg) + msg = "At least one variable/column is set as covariate \\('x_cols'\\) and instrumental variable in 'z_cols'." expect_error(DoubleMLData$new(dt, - y_col='yy', - d_cols='dd1', - x_cols=c('xx1', 'xx2'), - z_cols='xx2'), - regexp = msg) - } + y_col = "yy", + d_cols = "dd1", + x_cols = c("xx1", "xx2"), + z_cols = "xx2"), + regexp = msg) +} ) test_that("Test duplicates", { set.seed(3141) - dt = make_plr_CCDDHNR2018(n_obs=101, return_type = "data.table") - dml_data = DoubleMLData$new(dt, y_col = 'y', - d_cols = 'd') - + dt = make_plr_CCDDHNR2018(n_obs = 101, return_type = "data.table") + dml_data = DoubleMLData$new(dt, + y_col = "y", + d_cols = "d") + msg = "Assertion on 'd_cols' failed: Contains duplicated values, position 2." expect_error(DoubleMLData$new(dt, - y_col='y', - d_cols=c('d', 'd', 'X1'), - x_cols=c('X3', 'X2')), - regexp = msg) - expect_error(dml_data$d_cols <- c('d', 'd', 'X1'), - regexp = msg) - + y_col = "y", + d_cols = c("d", "d", "X1"), + x_cols = c("X3", "X2")), + regexp = msg) + expect_error(dml_data$d_cols <- c("d", "d", "X1"), + regexp = msg) + msg = "Assertion on 'x_cols' failed: Contains duplicated values, position 3." expect_error(DoubleMLData$new(dt, - y_col='y', - d_cols='d', - x_cols=c('X3', 'X2', 'X3')), - regexp = msg) - expect_error(dml_data$x_cols <- c('X3', 'X2', 'X3'), - regexp = msg) - + y_col = "y", + d_cols = "d", + x_cols = c("X3", "X2", "X3")), + regexp = msg) + expect_error(dml_data$x_cols <- c("X3", "X2", "X3"), + regexp = msg) + msg = "Assertion on 'z_cols' failed: Contains duplicated values, position 3." expect_error(DoubleMLData$new(dt, - y_col='y', - d_cols='d', - x_cols=c('X3', 'X2'), - z_cols=c('X15', 'X12', 'X12', 'X15')), - regexp = msg) - expect_error(dml_data$z_cols <- c('X15', 'X12', 'X12', 'X15'), - regexp = msg) - + y_col = "y", + d_cols = "d", + x_cols = c("X3", "X2"), + z_cols = c("X15", "X12", "X12", "X15")), + regexp = msg) + expect_error(dml_data$z_cols <- c("X15", "X12", "X12", "X15"), + regexp = msg) + df = as.data.frame(matrix(rnorm(20), ncol = 5)) - names(df) = c('y', 'd', 'X3', 'X2', 'y') + names(df) = c("y", "d", "X3", "X2", "y") dt = data.table::as.data.table(df) msg = "Assertion on 'names\\(data\\)' failed: Contains duplicated values, position 5." expect_error(DoubleMLData$new(dt, - y_col='y', - d_cols='d', - x_cols=c('X3', 'X2')), - regexp = msg) - - - } + y_col = "y", + d_cols = "d", + x_cols = c("X3", "X2")), + regexp = msg) +} ) test_that("Not setable fields", { set.seed(3141) - dml_data = make_plr_CCDDHNR2018(n_obs=101) - + dml_data = make_plr_CCDDHNR2018(n_obs = 101) + msg = "can't set field all_variables" - expect_error(dml_data$all_variables <- 'abc', - regexp = msg) + expect_error(dml_data$all_variables <- "abc", + regexp = msg) msg = "can't set field data" - expect_error(dml_data$data <- 'abc', - regexp = msg) + expect_error(dml_data$data <- "abc", + regexp = msg) msg = "can't set field data_model" - expect_error(dml_data$data_model <- 'abc', - regexp = msg) - + expect_error(dml_data$data_model <- "abc", + regexp = msg) + msg = "can't set field n_instr" expect_error(dml_data$n_instr <- 5, - regexp = msg) + regexp = msg) msg = "can't set field n_obs" expect_error(dml_data$n_obs <- 5, - regexp = msg) + regexp = msg) msg = "can't set field n_treat" expect_error(dml_data$n_treat <- 5, - regexp = msg) - + regexp = msg) + msg = "can't set field other_treat_cols" - expect_error(dml_data$other_treat_cols <- 'abc', - regexp = msg) + expect_error(dml_data$other_treat_cols <- "abc", + regexp = msg) msg = "can't set field treat_col" - expect_error(dml_data$treat_col <- 'abc', - regexp = msg) - } + expect_error(dml_data$treat_col <- "abc", + regexp = msg) +} ) diff --git a/tests/testthat/test-double_ml_iivm.R b/tests/testthat/test-double_ml_iivm.R index a709c2cd..022e771e 100644 --- a/tests/testthat/test-double_ml_iivm.R +++ b/tests/testthat/test-double_ml_iivm.R @@ -38,14 +38,14 @@ patrick::with_parameters_test_that("Unit tests for IIVM:", dml_procedure = dml_procedure, score = score) theta = iivm_hat$coef se = iivm_hat$se - + boot_theta = bootstrap_irmiv(iivm_hat$thetas, iivm_hat$ses, - data_iivm$df, - y = "y", d = "d", z = "z", - n_folds = 5, smpls = iivm_hat$smpls, - all_preds= iivm_hat$all_preds, - score = score, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + data_iivm$df, + y = "y", d = "d", z = "z", + n_folds = 5, smpls = iivm_hat$smpls, + all_preds = iivm_hat$all_preds, + score = score, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef set.seed(3141) double_mliivm_obj = DoubleMLIIVM$new( @@ -62,7 +62,7 @@ patrick::with_parameters_test_that("Unit tests for IIVM:", se_obj = double_mliivm_obj$se # bootstrap - double_mliivm_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mliivm_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mliivm_obj$boot_coef # at the moment the object result comes without a name diff --git a/tests/testthat/test-double_ml_iivm_parameter_passing.R b/tests/testthat/test-double_ml_iivm_parameter_passing.R index 2bb6b72c..50880d90 100644 --- a/tests/testthat/test-double_ml_iivm_parameter_passing.R +++ b/tests/testthat/test-double_ml_iivm_parameter_passing.R @@ -34,35 +34,35 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (oo n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_iivm(learner) - + set.seed(3141) iivm_hat = dml_irmiv(data_iivm$df, - y = "y", d = "d", z = "z", - n_folds = n_folds, - n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score, - trimming_threshold = trimming_threshold) + y = "y", d = "d", z = "z", + n_folds = n_folds, + n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score, + trimming_threshold = trimming_threshold) theta = iivm_hat$coef se = iivm_hat$se - + boot_theta = bootstrap_irmiv(iivm_hat$thetas, iivm_hat$ses, - data_iivm$df, - y = "y", d = "d", z = "z", - n_folds = n_folds, - n_rep = n_rep, - smpls = iivm_hat$smpls, - all_preds= iivm_hat$all_preds, - score = score, - bootstrap = "normal", n_rep_boot = n_rep_boot, - trimming_threshold = trimming_threshold)$boot_coef + data_iivm$df, + y = "y", d = "d", z = "z", + n_folds = n_folds, + n_rep = n_rep, + smpls = iivm_hat$smpls, + all_preds = iivm_hat$all_preds, + score = score, + bootstrap = "normal", n_rep_boot = n_rep_boot, + trimming_threshold = trimming_threshold)$boot_coef set.seed(3141) dml_iivm_obj = DoubleMLIIVM$new( @@ -100,14 +100,14 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (oo dml_iivm_obj$fit() theta_obj = dml_iivm_obj$coef se_obj = dml_iivm_obj$se - + # bootstrap - dml_iivm_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + dml_iivm_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = dml_iivm_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) - + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } ) @@ -117,7 +117,7 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (no n_folds = 2 learner_pars = get_default_mlmethod_iivm(learner) - + # Passing for non-cross-fitting case set.seed(3141) my_task = Task$new("help task", "regr", data_iivm$df) @@ -125,33 +125,34 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (no train_ids = list(my_sampling$train_set(1)) test_ids = list(my_sampling$test_set(1)) smpls = list(list(train_ids = train_ids, test_ids = test_ids)) - + iivm_hat = dml_irmiv(data_iivm$df, - y = "y", d = "d", z = "z", - n_folds = 1, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score, - trimming_threshold = trimming_threshold, - smpls=smpls) + y = "y", d = "d", z = "z", + n_folds = 1, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score, + trimming_threshold = trimming_threshold, + smpls = smpls) theta = iivm_hat$coef se = iivm_hat$se - + set.seed(3141) - dml_iivm_obj = DoubleMLIIVM$new(data = data_iivm$dml_data, - n_folds = n_folds, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - trimming_threshold = trimming_threshold, - apply_cross_fitting = FALSE) - + dml_iivm_obj = DoubleMLIIVM$new( + data = data_iivm$dml_data, + n_folds = n_folds, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + trimming_threshold = trimming_threshold, + apply_cross_fitting = FALSE) + dml_iivm_obj$set_ml_nuisance_params( learner = "ml_m", treat_var = "d", @@ -172,11 +173,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (no learner = "ml_r1", treat_var = "d", params = learner_pars$params$params_r) - + dml_iivm_obj$fit() theta_obj = dml_iivm_obj$coef se_obj = dml_iivm_obj$se - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) } @@ -187,20 +188,21 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (fo n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_iivm(learner) - + set.seed(3141) - dml_iivm_obj = DoubleMLIIVM$new(data = data_iivm$dml_data, - n_folds = n_folds, - n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - trimming_threshold = trimming_threshold) - + dml_iivm_obj = DoubleMLIIVM$new( + data = data_iivm$dml_data, + n_folds = n_folds, + n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + trimming_threshold = trimming_threshold) + dml_iivm_obj$set_ml_nuisance_params( learner = "ml_m", treat_var = "d", @@ -221,27 +223,28 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (fo learner = "ml_r1", treat_var = "d", params = learner_pars$params$params_r) - + dml_iivm_obj$fit() theta = dml_iivm_obj$coef se = dml_iivm_obj$se - + params_g_fold_wise = rep(list(rep(list(learner_pars$params$params_g), n_folds)), n_rep) params_m_fold_wise = rep(list(rep(list(learner_pars$params$params_m), n_folds)), n_rep) params_r_fold_wise = rep(list(rep(list(learner_pars$params$params_r), n_folds)), n_rep) - + set.seed(3141) - - dml_iivm_fold_wise = DoubleMLIIVM$new(data = data_iivm$dml_data, - n_folds = n_folds, - n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - trimming_threshold = trimming_threshold) - + + dml_iivm_fold_wise = DoubleMLIIVM$new( + data = data_iivm$dml_data, + n_folds = n_folds, + n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r, predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + trimming_threshold = trimming_threshold) + dml_iivm_fold_wise$set_ml_nuisance_params( learner = "ml_m", treat_var = "d", @@ -267,11 +270,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (fo treat_var = "d", params = params_r_fold_wise, set_fold_specific = TRUE) - + dml_iivm_fold_wise$fit() theta_fold_wise = dml_iivm_fold_wise$coef se_fold_wise = dml_iivm_fold_wise$se - + expect_equal(theta, theta_fold_wise, tolerance = 1e-8) expect_equal(se, se_fold_wise, tolerance = 1e-8) } @@ -281,37 +284,39 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (de .cases = test_cases, { n_folds = 2 n_rep = 3 - + params_g = list(cp = 0.01, minsplit = 20) # this are defaults params_m = list(cp = 0.01, minsplit = 20) # this are defaults params_r = list(cp = 0.01, minsplit = 20) # this are defaults - + set.seed(3141) - dml_iivm_default = DoubleMLIIVM$new(data = data_iivm$dml_data, - n_folds = n_folds, - n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('classif.rpart', predict_type = "prob"), - ml_r = lrn('classif.rpart', predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - trimming_threshold = trimming_threshold) - + dml_iivm_default = DoubleMLIIVM$new( + data = data_iivm$dml_data, + n_folds = n_folds, + n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("classif.rpart", predict_type = "prob"), + ml_r = lrn("classif.rpart", predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + trimming_threshold = trimming_threshold) + dml_iivm_default$fit() theta_default = dml_iivm_default$coef se_default = dml_iivm_default$se - + set.seed(3141) - dml_iivm_obj = DoubleMLIIVM$new(data = data_iivm$dml_data, - n_folds = n_folds, - n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('classif.rpart', predict_type = "prob"), - ml_r = lrn('classif.rpart', predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - trimming_threshold = trimming_threshold) - + dml_iivm_obj = DoubleMLIIVM$new( + data = data_iivm$dml_data, + n_folds = n_folds, + n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("classif.rpart", predict_type = "prob"), + ml_r = lrn("classif.rpart", predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + trimming_threshold = trimming_threshold) + dml_iivm_obj$set_ml_nuisance_params( learner = "ml_m", treat_var = "d", @@ -332,11 +337,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IIVM (de learner = "ml_r1", treat_var = "d", params = params_r) - + dml_iivm_obj$fit() theta = dml_iivm_obj$coef se = dml_iivm_obj$se - + expect_equal(theta, theta_default, tolerance = 1e-8) expect_equal(se, se_default, tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_iivm_trim.R b/tests/testthat/test-double_ml_iivm_trim.R index da228872..1f229760 100644 --- a/tests/testthat/test-double_ml_iivm_trim.R +++ b/tests/testthat/test-double_ml_iivm_trim.R @@ -29,24 +29,24 @@ patrick::with_parameters_test_that("Unit tests for IIVM:", set.seed(3141) iivm_hat = dml_irmiv(data_iivm$df, - y = "y", d = "d", z = "z", - n_folds = 5, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - ml_r = learner_pars$ml_r$clone(), - dml_procedure = dml_procedure, score = score, - trimming_threshold = trimming_threshold) + y = "y", d = "d", z = "z", + n_folds = 5, + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + ml_r = learner_pars$ml_r$clone(), + dml_procedure = dml_procedure, score = score, + trimming_threshold = trimming_threshold) theta = iivm_hat$coef se = iivm_hat$se - + boot_theta = bootstrap_irmiv(iivm_hat$thetas, iivm_hat$ses, - data_iivm$df, - y = "y", d = "d", z = "z", - n_folds = 5, smpls = iivm_hat$smpls, - all_preds= iivm_hat$all_preds, - score = score, - bootstrap = "normal", n_rep_boot = n_rep_boot, - trimming_threshold = trimming_threshold)$boot_coef + data_iivm$df, + y = "y", d = "d", z = "z", + n_folds = 5, smpls = iivm_hat$smpls, + all_preds = iivm_hat$all_preds, + score = score, + bootstrap = "normal", n_rep_boot = n_rep_boot, + trimming_threshold = trimming_threshold)$boot_coef set.seed(3141) @@ -74,9 +74,9 @@ patrick::with_parameters_test_that("Unit tests for IIVM:", se_obj = double_mliivm_obj$se # bootstrap - double_mliivm_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mliivm_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mliivm_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_irm.R b/tests/testthat/test-double_ml_irm.R index 2c53649c..2c5ac653 100644 --- a/tests/testthat/test-double_ml_irm.R +++ b/tests/testthat/test-double_ml_irm.R @@ -36,14 +36,14 @@ patrick::with_parameters_test_that("Unit tests for IRM:", dml_procedure = dml_procedure, score = score) theta = irm_hat$coef se = irm_hat$se - + boot_theta = bootstrap_irm(irm_hat$thetas, irm_hat$ses, - data_irm$df, - y = "y", d = "d", - n_folds = 5, smpls = irm_hat$smpls, - all_preds= irm_hat$all_preds, - score = score, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + data_irm$df, + y = "y", d = "d", + n_folds = 5, smpls = irm_hat$smpls, + all_preds = irm_hat$all_preds, + score = score, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef set.seed(3141) @@ -61,7 +61,7 @@ patrick::with_parameters_test_that("Unit tests for IRM:", se_obj = double_mlirm_obj$se # bootstrap - double_mlirm_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlirm_obj$boot_coef # at the moment the object result comes without a name diff --git a/tests/testthat/test-double_ml_irm_loaded_mlr3learner.R b/tests/testthat/test-double_ml_irm_loaded_mlr3learner.R index 8803ce07..778ff6e6 100644 --- a/tests/testthat/test-double_ml_irm_loaded_mlr3learner.R +++ b/tests/testthat/test-double_ml_irm_loaded_mlr3learner.R @@ -73,7 +73,7 @@ patrick::with_parameters_test_that("Unit tests for IRM:", double_mlirm$fit() theta = double_mlirm$coef se = double_mlirm$se - double_mlirm$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlirm$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta = double_mlirm$boot_coef @@ -90,9 +90,9 @@ patrick::with_parameters_test_that("Unit tests for IRM:", double_mlirm_loaded$fit() theta_loaded = double_mlirm_loaded$coef se_loaded = double_mlirm_loaded$se - double_mlirm_loaded$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlirm_loaded$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_loaded = double_mlirm_loaded$boot_coef - + expect_equal(theta, theta_loaded, tolerance = 1e-8) expect_equal(se, se_loaded, tolerance = 1e-8) expect_equal(as.vector(boot_theta), as.vector(boot_theta_loaded), tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_irm_parameter_passing.R b/tests/testthat/test-double_ml_irm_parameter_passing.R index d214bb97..2d668875 100644 --- a/tests/testthat/test-double_ml_irm_parameter_passing.R +++ b/tests/testthat/test-double_ml_irm_parameter_passing.R @@ -34,35 +34,36 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (oop n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_irm(learner) - + set.seed(3141) irm_hat = dml_irm(data_irm$df, - y = "y", d = "d", - n_folds = n_folds, - n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - dml_procedure = dml_procedure, score = score, - trimming_threshold = trimming_threshold) + y = "y", d = "d", + n_folds = n_folds, + n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + dml_procedure = dml_procedure, score = score, + trimming_threshold = trimming_threshold) theta = irm_hat$coef se = irm_hat$se - + boot_theta = bootstrap_irm(irm_hat$thetas, irm_hat$ses, - data_irm$df, - y = "y", d = "d", - n_folds = n_folds, n_rep = n_rep, - smpls = irm_hat$smpls, - all_preds= irm_hat$all_preds, - score = score, - bootstrap = "normal", n_rep_boot = n_rep_boot, - trimming_threshold = trimming_threshold)$boot_coef + data_irm$df, + y = "y", d = "d", + n_folds = n_folds, n_rep = n_rep, + smpls = irm_hat$smpls, + all_preds = irm_hat$all_preds, + score = score, + bootstrap = "normal", n_rep_boot = n_rep_boot, + trimming_threshold = trimming_threshold)$boot_coef set.seed(3141) - double_mlirm_obj = DoubleMLIRM$new(data = data_irm$dml_data, + double_mlirm_obj = DoubleMLIRM$new( + data = data_irm$dml_data, n_folds = n_folds, ml_g = lrn(learner_pars$mlmethod$mlmethod_g), ml_m = lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), @@ -90,7 +91,7 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (oop se_obj = double_mlirm_obj$se # bootstrap - double_mlirm_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlirm_obj$boot_coef expect_equal(theta, theta_obj, tolerance = 1e-8) @@ -115,28 +116,29 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (no smpls = list(list(train_ids = train_ids, test_ids = test_ids)) irm_hat = dml_irm(data_irm$df, - y = "y", d = "d", - n_folds = 1, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - dml_procedure = dml_procedure, score = score, - trimming_threshold = trimming_threshold, - smpls=smpls) + y = "y", d = "d", + n_folds = 1, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + dml_procedure = dml_procedure, score = score, + trimming_threshold = trimming_threshold, + smpls = smpls) theta = irm_hat$coef se = irm_hat$se - + set.seed(3141) - dml_irm_nocf = DoubleMLIRM$new(data = data_irm$dml_data, - n_folds = n_folds, - ml_g = lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - trimming_threshold = trimming_threshold, - apply_cross_fitting = FALSE) - + dml_irm_nocf = DoubleMLIRM$new( + data = data_irm$dml_data, + n_folds = n_folds, + ml_g = lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + trimming_threshold = trimming_threshold, + apply_cross_fitting = FALSE) + # set params for nuisance part m dml_irm_nocf$set_ml_nuisance_params( learner = "ml_m", @@ -151,11 +153,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (no learner = "ml_g1", treat_var = "d", params = learner_pars$params$params_g) - + dml_irm_nocf$fit() theta_obj = dml_irm_nocf$coef se_obj = dml_irm_nocf$se - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) } @@ -169,14 +171,15 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (fol learner_pars = get_default_mlmethod_irm(learner) set.seed(3141) - double_mlirm_obj = DoubleMLIRM$new(data = data_irm$dml_data, - n_folds = n_folds, - ml_g = lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep, trimming_threshold = trimming_threshold) - + double_mlirm_obj = DoubleMLIRM$new( + data = data_irm$dml_data, + n_folds = n_folds, + ml_g = lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep, trimming_threshold = trimming_threshold) + # set params for nuisance part m double_mlirm_obj$set_ml_nuisance_params( learner = "ml_m", @@ -191,23 +194,24 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (fol learner = "ml_g1", treat_var = "d", params = learner_pars$params$params_g) - + double_mlirm_obj$fit() theta = double_mlirm_obj$coef se = double_mlirm_obj$se - + params_g_fold_wise = rep(list(rep(list(learner_pars$params$params_g), n_folds)), n_rep) params_m_fold_wise = rep(list(rep(list(learner_pars$params$params_m), n_folds)), n_rep) - + set.seed(3141) - dml_irm_fold_wise = DoubleMLIRM$new(data = data_irm$dml_data, - n_folds = n_folds, - ml_g = lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep, trimming_threshold = trimming_threshold) - + dml_irm_fold_wise = DoubleMLIRM$new( + data = data_irm$dml_data, + n_folds = n_folds, + ml_g = lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = lrn(learner_pars$mlmethod$mlmethod_m, predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep, trimming_threshold = trimming_threshold) + # set params for nuisance part m dml_irm_fold_wise$set_ml_nuisance_params( learner = "ml_m", @@ -225,11 +229,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (fol treat_var = "d", params = params_g_fold_wise, set_fold_specific = TRUE) - + dml_irm_fold_wise$fit() theta_fold_wise = dml_irm_fold_wise$coef se_fold_wise = dml_irm_fold_wise$se - + expect_equal(theta, theta_fold_wise, tolerance = 1e-8) expect_equal(se, se_fold_wise, tolerance = 1e-8) } @@ -239,32 +243,34 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (def .cases = test_cases, { n_folds = 2 n_rep = 3 - + params_g = list(cp = 0.01, minsplit = 20) # this are defaults params_m = list(cp = 0.01, minsplit = 20) # this are defaults - + set.seed(3141) - dml_irm_default = DoubleMLIRM$new(data = data_irm$dml_data, - n_folds = n_folds, - ml_g = lrn('regr.rpart'), - ml_m = lrn('classif.rpart', predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep, trimming_threshold = trimming_threshold) - + dml_irm_default = DoubleMLIRM$new( + data = data_irm$dml_data, + n_folds = n_folds, + ml_g = lrn("regr.rpart"), + ml_m = lrn("classif.rpart", predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep, trimming_threshold = trimming_threshold) + dml_irm_default$fit() theta_default = dml_irm_default$coef se_default = dml_irm_default$se - + set.seed(3141) - double_mlirm_obj = DoubleMLIRM$new(data = data_irm$dml_data, - n_folds = n_folds, - ml_g = lrn('regr.rpart'), - ml_m = lrn('classif.rpart', predict_type = "prob"), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep, trimming_threshold = trimming_threshold) - + double_mlirm_obj = DoubleMLIRM$new( + data = data_irm$dml_data, + n_folds = n_folds, + ml_g = lrn("regr.rpart"), + ml_m = lrn("classif.rpart", predict_type = "prob"), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep, trimming_threshold = trimming_threshold) + # set params for nuisance part m double_mlirm_obj$set_ml_nuisance_params( learner = "ml_m", @@ -279,11 +285,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of IRM (def learner = "ml_g1", treat_var = "d", params = params_g) - + double_mlirm_obj$fit() theta = double_mlirm_obj$coef se = double_mlirm_obj$se - + expect_equal(theta, theta_default, tolerance = 1e-8) expect_equal(se, se_default, tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_irm_trim.R b/tests/testthat/test-double_ml_irm_trim.R index cd085ed3..cb59fa80 100644 --- a/tests/testthat/test-double_ml_irm_trim.R +++ b/tests/testthat/test-double_ml_irm_trim.R @@ -29,23 +29,23 @@ patrick::with_parameters_test_that("Unit tests for IRM:", set.seed(3141) irm_hat = dml_irm(data_irm$df, - y = "y", d = "d", - n_folds = 5, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - dml_procedure = dml_procedure, score = score, - trimming_threshold = trimming_threshold) + y = "y", d = "d", + n_folds = 5, + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + dml_procedure = dml_procedure, score = score, + trimming_threshold = trimming_threshold) theta = irm_hat$coef se = irm_hat$se - + boot_theta = bootstrap_irm(irm_hat$thetas, irm_hat$ses, - data_irm$df, - y = "y", d = "d", - n_folds = 5, smpls = irm_hat$smpls, - all_preds= irm_hat$all_preds, - score = score, - bootstrap = "normal", n_rep_boot = n_rep_boot, - trimming_threshold = trimming_threshold)$boot_coef + data_irm$df, + y = "y", d = "d", + n_folds = 5, smpls = irm_hat$smpls, + all_preds = irm_hat$all_preds, + score = score, + bootstrap = "normal", n_rep_boot = n_rep_boot, + trimming_threshold = trimming_threshold)$boot_coef set.seed(3141) double_mlirm_obj = DoubleMLIRM$new( @@ -63,7 +63,7 @@ patrick::with_parameters_test_that("Unit tests for IRM:", se_obj = double_mlirm_obj$se # bootstrap - double_mlirm_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlirm_obj$boot_coef expect_equal(theta, theta_obj, tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_irm_user_score.R b/tests/testthat/test-double_ml_irm_user_score.R index 6cefe133..d86acab2 100644 --- a/tests/testthat/test-double_ml_irm_user_score.R +++ b/tests/testthat/test-double_ml_irm_user_score.R @@ -54,7 +54,7 @@ patrick::with_parameters_test_that("Unit tests for IRM, callable score:", double_mlirm_obj$fit() theta_obj = double_mlirm_obj$coef se_obj = double_mlirm_obj$se - double_mlirm_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlirm_obj$boot_coef set.seed(3141) @@ -70,7 +70,7 @@ patrick::with_parameters_test_that("Unit tests for IRM, callable score:", theta_obj_score = double_mlirm_obj_score$coef se_obj_score = double_mlirm_obj_score$se - double_mlirm_obj_score$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlirm_obj_score$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_score = double_mlirm_obj_score$boot_coef expect_equal(theta_obj_score, theta_obj, tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_pliv.R b/tests/testthat/test-double_ml_pliv.R index 81d483b3..3d9e8652 100644 --- a/tests/testthat/test-double_ml_pliv.R +++ b/tests/testthat/test-double_ml_pliv.R @@ -35,13 +35,13 @@ patrick::with_parameters_test_that("Unit tests for PLIV:", dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - + boot_theta = bootstrap_pliv(pliv_hat$thetas, pliv_hat$ses, - data_pliv$df, - y = "y", d = "d", z = "z", - n_folds = 5, smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + data_pliv$df, + y = "y", d = "d", z = "z", + n_folds = 5, smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef set.seed(3141) double_mlpliv_obj = DoubleMLPLIV$new( @@ -58,7 +58,7 @@ patrick::with_parameters_test_that("Unit tests for PLIV:", se_obj = double_mlpliv_obj$se # bootstrap - double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlpliv_obj$boot_coef # at the moment the object result comes without a name diff --git a/tests/testthat/test-double_ml_pliv_multi_z_parameter_passing.R b/tests/testthat/test-double_ml_pliv_multi_z_parameter_passing.R index a9ee9e4a..83d7d1c0 100644 --- a/tests/testthat/test-double_ml_pliv_multi_z_parameter_passing.R +++ b/tests/testthat/test-double_ml_pliv_multi_z_parameter_passing.R @@ -26,39 +26,39 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df - + set.seed(3141) pliv_hat = dml_pliv_partial_x(df, - y = "y", d = "d", z = c("z", "z2"), - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score) + y = "y", d = "d", z = c("z", "z2"), + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - + set.seed(3141) boot_theta = bootstrap_pliv_partial_x(pliv_hat$thetas, pliv_hat$ses, - df, - y = "y", d = "d", z = c("z", "z2"), - n_folds = n_folds, n_rep = n_rep, - smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef - + df, + y = "y", d = "d", z = c("z", "z2"), + n_folds = n_folds, n_rep = n_rep, + smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + set.seed(3141) Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] - + dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) dml_pliv_obj = DoubleMLPLIV.partialX( data = dml_data, n_folds = n_folds, n_rep = n_rep, @@ -67,33 +67,37 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), dml_procedure = dml_procedure, score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_obj$set_ml_nuisance_params(learner = "ml_m_z", - treat_var = "d", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(learner = "ml_m_z2", - treat_var = "d", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_obj$set_ml_nuisance_params( + learner = "ml_m_z", + treat_var = "d", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + learner = "ml_m_z2", + treat_var = "d", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() - + theta_obj = dml_pliv_obj$coef se_obj = dml_pliv_obj$se - + # bootstrap set.seed(3141) - dml_pliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + dml_pliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = dml_pliv_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) - + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } ) @@ -102,75 +106,83 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par .cases = test_cases, { n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) - + df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) - + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + set.seed(3141) dml_pliv_obj = DoubleMLPLIV.partialX(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_obj$set_ml_nuisance_params(learner = "ml_m_z", - treat_var = "d", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(learner = "ml_m_z2", - treat_var = "d", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_obj$set_ml_nuisance_params( + learner = "ml_m_z", + treat_var = "d", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + learner = "ml_m_z2", + treat_var = "d", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se - + params_g_fold_wise = rep(list(rep(list(learner_pars$params$params_g), n_folds)), n_rep) params_m_fold_wise = rep(list(rep(list(learner_pars$params$params_m), n_folds)), n_rep) params_r_fold_wise = rep(list(rep(list(learner_pars$params$params_r), n_folds)), n_rep) - + set.seed(3141) dml_pliv_obj_fold_wise = DoubleMLPLIV.partialX(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = params_g_fold_wise, - set_fold_specific = TRUE) - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m_z", - params = params_m_fold_wise, - set_fold_specific = TRUE) - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m_z2", - params = params_m_fold_wise, - set_fold_specific = TRUE) - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r_fold_wise, - set_fold_specific = TRUE) - + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = params_g_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m_z", + params = params_m_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m_z2", + params = params_m_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$fit() theta_fold_wise = dml_pliv_obj_fold_wise$coef se_fold_wise = dml_pliv_obj_fold_wise$se - + expect_equal(theta, theta_fold_wise, tolerance = 1e-8) expect_equal(se, se_fold_wise, tolerance = 1e-8) } @@ -180,56 +192,60 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par .cases = test_cases, { n_folds = 2 n_rep = 3 - + params_g = list(cp = 0.01, minsplit = 20) # this are defaults params_m = list(cp = 0.01, minsplit = 20) # this are defaults params_r = list(cp = 0.01, minsplit = 20) # this are defaults - + df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) - + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + set.seed(3141) dml_pliv_default = DoubleMLPLIV.partialX(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - + n_folds = n_folds, n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + dml_pliv_default$fit() theta_default = dml_pliv_default$coef se_default = dml_pliv_default$se - + set.seed(3141) dml_pliv_obj = DoubleMLPLIV.partialX(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = params_g) - dml_pliv_obj$set_ml_nuisance_params(learner = "ml_m_z", - treat_var = "d", - params = params_m) - dml_pliv_obj$set_ml_nuisance_params(learner = "ml_m_z2", - treat_var = "d", - params = params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r) - + n_folds = n_folds, n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = params_g) + dml_pliv_obj$set_ml_nuisance_params( + learner = "ml_m_z", + treat_var = "d", + params = params_m) + dml_pliv_obj$set_ml_nuisance_params( + learner = "ml_m_z2", + treat_var = "d", + params = params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se - + expect_equal(theta, theta_default, tolerance = 1e-8) expect_equal(se, se_default, tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_pliv_parameter_passing.R b/tests/testthat/test-double_ml_pliv_parameter_passing.R index 0e64b384..8c57978a 100644 --- a/tests/testthat/test-double_ml_pliv_parameter_passing.R +++ b/tests/testthat/test-double_ml_pliv_parameter_passing.R @@ -31,30 +31,30 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV (oo n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) - + set.seed(3141) pliv_hat = dml_pliv(data_pliv$df, - y = "y", d = "d", z = "z", - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score) + y = "y", d = "d", z = "z", + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - + boot_theta = bootstrap_pliv(pliv_hat$thetas, pliv_hat$ses, - data_pliv$df, - y = "y", d = "d", z = "z", - n_folds = n_folds, n_rep = n_rep, - smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + data_pliv$df, + y = "y", d = "d", z = "z", + n_folds = n_folds, n_rep = n_rep, + smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef set.seed(3141) dml_pliv_obj = DoubleMLPLIV$new( @@ -65,29 +65,32 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV (oo ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), dml_procedure = dml_procedure, score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() theta_obj = dml_pliv_obj$coef se_obj = dml_pliv_obj$se - + # bootstrap - dml_pliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + dml_pliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = dml_pliv_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) - + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } ) @@ -95,9 +98,9 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV (oo patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV (no cross-fitting)", .cases = test_cases_nocf, { n_folds = 2 - + learner_pars = get_default_mlmethod_pliv(learner) - + # Passing for non-cross-fitting case set.seed(3141) my_task = Task$new("help task", "regr", data_pliv$df) @@ -105,45 +108,49 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV (no train_ids = list(my_sampling$train_set(1)) test_ids = list(my_sampling$test_set(1)) smpls = list(list(train_ids = train_ids, test_ids = test_ids)) - + pliv_hat = dml_pliv(data_pliv$df, - y = "y", d = "d", z = "z", - n_folds = 1, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score, - smpls=smpls) + y = "y", d = "d", z = "z", + n_folds = 1, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score, + smpls = smpls) theta = pliv_hat$coef se = pliv_hat$se - + set.seed(3141) - dml_pliv_nocf = DoubleMLPLIV$new(data = data_pliv$dml_data, - n_folds = n_folds, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score, - apply_cross_fitting = FALSE) - - dml_pliv_nocf$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_nocf$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = learner_pars$params$params_m) - dml_pliv_nocf$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + dml_pliv_nocf = DoubleMLPLIV$new( + data = data_pliv$dml_data, + n_folds = n_folds, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score, + apply_cross_fitting = FALSE) + + dml_pliv_nocf$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_nocf$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = learner_pars$params$params_m) + dml_pliv_nocf$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_nocf$fit() theta_obj = dml_pliv_nocf$coef se_obj = dml_pliv_nocf$se - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) } @@ -153,62 +160,68 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV (fo .cases = test_cases, { n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) - + set.seed(3141) dml_pliv_obj = DoubleMLPLIV$new(data_pliv$dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se - + params_g_fold_wise = rep(list(rep(list(learner_pars$params$params_g), n_folds)), n_rep) params_m_fold_wise = rep(list(rep(list(learner_pars$params$params_m), n_folds)), n_rep) params_r_fold_wise = rep(list(rep(list(learner_pars$params$params_r), n_folds)), n_rep) - + set.seed(3141) dml_pliv_obj_fold_wise = DoubleMLPLIV$new(data_pliv$dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = params_g_fold_wise, - set_fold_specific = TRUE) - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = params_m_fold_wise, - set_fold_specific = TRUE) - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r_fold_wise, - set_fold_specific = TRUE) + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = params_g_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = params_m_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r_fold_wise, + set_fold_specific = TRUE) dml_pliv_obj_fold_wise$fit() theta_fold_wise = dml_pliv_obj_fold_wise$coef se_fold_wise = dml_pliv_obj_fold_wise$se - + expect_equal(theta, theta_fold_wise, tolerance = 1e-8) expect_equal(se, se_fold_wise, tolerance = 1e-8) } @@ -218,47 +231,51 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV (de .cases = test_cases, { n_folds = 2 n_rep = 3 - + params_g = list(cp = 0.01, minsplit = 20) # this are defaults params_m = list(cp = 0.01, minsplit = 20) # this are defaults params_r = list(cp = 0.01, minsplit = 20) # this are defaults - + set.seed(3141) dml_pliv_default = DoubleMLPLIV$new(data_pliv$dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - + n_folds = n_folds, n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + dml_pliv_default$fit() theta_default = dml_pliv_default$coef se_default = dml_pliv_default$se - + set.seed(3141) - dml_pliv_obj = DoubleMLPLIV$new(data = data_pliv$dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = params_g) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r) - + dml_pliv_obj = DoubleMLPLIV$new( + data = data_pliv$dml_data, + n_folds = n_folds, n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = params_g) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se - + expect_equal(theta, theta_default, tolerance = 1e-8) expect_equal(se, se_default, tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_pliv_partial_functional_initializer.R b/tests/testthat/test-double_ml_pliv_partial_functional_initializer.R index cc4e3364..f9e9d35c 100644 --- a/tests/testthat/test-double_ml_pliv_partial_functional_initializer.R +++ b/tests/testthat/test-double_ml_pliv_partial_functional_initializer.R @@ -54,7 +54,7 @@ patrick::with_parameters_test_that("Unit tests for PLIV (partialX functional ini double_mlpliv_partX$fit() theta_partX = double_mlpliv_partX$coef se_partX = double_mlpliv_partX$se - + expect_equal(theta_partX, theta_obj, tolerance = 1e-8) expect_equal(se_partX, se_obj, tolerance = 1e-8) } @@ -66,18 +66,18 @@ patrick::with_parameters_test_that("Unit tests for PLIV (partialZ functional ini df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] data_ml = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) # Partial out Z set.seed(3141) double_mlpliv_partZ = DoubleMLPLIV$new(data_ml, - n_folds = 5, - ml_g = NULL, - ml_m = NULL, - ml_r = learner_pars$ml_r$clone(), - dml_procedure = dml_procedure, - score = score, - partialX = FALSE, partialZ = TRUE) + n_folds = 5, + ml_g = NULL, + ml_m = NULL, + ml_r = learner_pars$ml_r$clone(), + dml_procedure = dml_procedure, + score = score, + partialX = FALSE, partialZ = TRUE) double_mlpliv_partZ$fit() theta_partZ = double_mlpliv_partZ$coef @@ -93,7 +93,7 @@ patrick::with_parameters_test_that("Unit tests for PLIV (partialZ functional ini double_mlpliv_partZ_fun$fit() theta_partZ_fun = double_mlpliv_partZ_fun$coef se_partZ_fun = double_mlpliv_partZ_fun$se - + expect_equal(theta_partZ, theta_partZ_fun, tolerance = 1e-8) expect_equal(se_partZ, se_partZ_fun, tolerance = 1e-8) } @@ -105,23 +105,23 @@ patrick::with_parameters_test_that("Unit tests for PLIV (partialXZ functional in df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] data_ml = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) set.seed(3141) double_mlpliv_partXZ = DoubleMLPLIV$new(data_ml, - n_folds = 5, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - ml_r = learner_pars$ml_r$clone(), - dml_procedure = dml_procedure, - score = score, - partialX = TRUE, partialZ = TRUE) - + n_folds = 5, + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + ml_r = learner_pars$ml_r$clone(), + dml_procedure = dml_procedure, + score = score, + partialX = TRUE, partialZ = TRUE) + double_mlpliv_partXZ$fit() theta_partXZ = double_mlpliv_partXZ$coef se_partXZ = double_mlpliv_partXZ$se - + set.seed(3141) double_mlpliv_partXZ_fun = DoubleMLPLIV.partialXZ(data_ml, n_folds = 5, diff --git a/tests/testthat/test-double_ml_pliv_partial_x.R b/tests/testthat/test-double_ml_pliv_partial_x.R index 2c30d16b..fee0d8f2 100644 --- a/tests/testthat/test-double_ml_pliv_partial_x.R +++ b/tests/testthat/test-double_ml_pliv_partial_x.R @@ -27,23 +27,23 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialX:", dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - + set.seed(3141) boot_theta = bootstrap_pliv_partial_x(pliv_hat$thetas, pliv_hat$ses, - data_pliv_partialX$df, - y = "y", d = "d", z = paste0("Z", 1:dim_z), - n_folds = 5, smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + data_pliv_partialX$df, + y = "y", d = "d", z = paste0("Z", 1:dim_z), + n_folds = 5, smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef set.seed(3141) double_mlpliv_obj = DoubleMLPLIV.partialX(data_pliv_partialX$dml_data, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - ml_r = learner_pars$ml_r$clone(), - n_folds = 5, - score = score, - dml_procedure = dml_procedure) + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + ml_r = learner_pars$ml_r$clone(), + n_folds = 5, + score = score, + dml_procedure = dml_procedure) double_mlpliv_obj$fit() theta_obj = double_mlpliv_obj$coef @@ -51,7 +51,7 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialX:", # bootstrap set.seed(3141) - double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlpliv_obj$boot_coef # at the moment the object result comes without a name @@ -62,15 +62,18 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialX:", ) test_that("Unit tests for PLIV.partialX invalid score", { - msg = paste("Callable score not implemented for DoubleMLPLIV with", - "partialX=TRUE and partialZ=FALSE with several instruments.") + msg = paste( + "Callable score not implemented for DoubleMLPLIV with", + "partialX=TRUE and partialZ=FALSE with several instruments.") double_mlplr_obj <- DoubleMLPLIV.partialX( data_pliv_partialX$dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('regr.rpart'), - ml_r = mlr3::lrn('regr.rpart'), - score = function(x) return(mean(x))) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("regr.rpart"), + ml_r = mlr3::lrn("regr.rpart"), + score = function(x) { + return(mean(x)) + }) expect_error(double_mlplr_obj$fit(), - regexp = msg) - } + regexp = msg) +} ) diff --git a/tests/testthat/test-double_ml_pliv_partial_xz.R b/tests/testthat/test-double_ml_pliv_partial_xz.R index 192fdf71..837a8882 100644 --- a/tests/testthat/test-double_ml_pliv_partial_xz.R +++ b/tests/testthat/test-double_ml_pliv_partial_xz.R @@ -31,27 +31,27 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialXZ:", se = pliv_hat$se boot_theta = bootstrap_pliv_partial_xz(pliv_hat$thetas, pliv_hat$ses, - data_pliv_partialXZ$df, - y = "y", d = "d", z = paste0("Z", 1:dim_z), - n_folds = 5, smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + data_pliv_partialXZ$df, + y = "y", d = "d", z = paste0("Z", 1:dim_z), + n_folds = 5, smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef set.seed(3141) double_mlpliv_obj = DoubleMLPLIV.partialXZ(data_pliv_partialXZ$dml_data, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - ml_r = learner_pars$ml_r$clone(), - n_folds = 5, - score = score, - dml_procedure = dml_procedure) + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + ml_r = learner_pars$ml_r$clone(), + n_folds = 5, + score = score, + dml_procedure = dml_procedure) - double_mlpliv_obj$fit(store_predictions=TRUE) + double_mlpliv_obj$fit(store_predictions = TRUE) theta_obj = double_mlpliv_obj$coef se_obj = double_mlpliv_obj$se # bootstrap - double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlpliv_obj$boot_coef # at the moment the object result comes without a name @@ -62,16 +62,18 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialXZ:", ) test_that("Unit tests for PLIV.partialXZ invalid score", { - msg = paste("Callable score not implemented for DoubleMLPLIV with", - "partialX=TRUE and partialZ=TRUE.") + msg = paste( + "Callable score not implemented for DoubleMLPLIV with", + "partialX=TRUE and partialZ=TRUE.") double_mlplr_obj <- DoubleMLPLIV.partialXZ( data_pliv_partialXZ$dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('regr.rpart'), - ml_r = mlr3::lrn('regr.rpart'), - score = function(x) return(mean(x))) + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("regr.rpart"), + ml_r = mlr3::lrn("regr.rpart"), + score = function(x) { + return(mean(x)) + }) expect_error(double_mlplr_obj$fit(), - regexp = msg) + regexp = msg) } ) - diff --git a/tests/testthat/test-double_ml_pliv_partial_xz_parameter_passing.R b/tests/testthat/test-double_ml_pliv_partial_xz_parameter_passing.R index f7f3aca2..efb4dfa4 100644 --- a/tests/testthat/test-double_ml_pliv_partial_xz_parameter_passing.R +++ b/tests/testthat/test-double_ml_pliv_partial_xz_parameter_passing.R @@ -24,38 +24,38 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df - + set.seed(3141) pliv_hat = dml_pliv_partial_xz(df, - y = "y", d = "d", z = c("z", "z2"), - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score) + y = "y", d = "d", z = c("z", "z2"), + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - + boot_theta = bootstrap_pliv_partial_xz(pliv_hat$thetas, pliv_hat$ses, - df, - y = "y", d = "d", z = c("z", "z2"), - n_folds = n_folds, n_rep = n_rep, - smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef - + df, + y = "y", d = "d", z = c("z", "z2"), + n_folds = n_folds, n_rep = n_rep, + smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + set.seed(3141) Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] - + dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) dml_pliv_obj = DoubleMLPLIV.partialXZ( data = dml_data, n_folds = n_folds, n_rep = n_rep, @@ -64,29 +64,32 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), dml_procedure = dml_procedure, score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() - + theta_obj = dml_pliv_obj$coef se_obj = dml_pliv_obj$se - + # bootstrap - dml_pliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + dml_pliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = dml_pliv_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) - + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } ) @@ -94,10 +97,10 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.partialXZ (no cross-fitting)", .cases = test_cases_nocf, { n_folds = 2 - + learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df - + # Passing for non-cross-fitting case set.seed(3141) my_task = Task$new("help task", "regr", data_pliv$df) @@ -105,27 +108,27 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par train_ids = list(my_sampling$train_set(1)) test_ids = list(my_sampling$test_set(1)) smpls = list(list(train_ids = train_ids, test_ids = test_ids)) - + pliv_hat = dml_pliv_partial_xz(df, - y = "y", d = "d", z = c("z", "z2"), - n_folds = 1, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - params_g = learner_pars$params$params_g, - params_m = learner_pars$params$params_m, - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score, - smpls=smpls) + y = "y", d = "d", z = c("z", "z2"), + n_folds = 1, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + params_g = learner_pars$params$params_g, + params_m = learner_pars$params$params_m, + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score, + smpls = smpls) theta = pliv_hat$coef se = pliv_hat$se - + set.seed(3141) Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] - + dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) dml_pliv_nocf = DoubleMLPLIV.partialXZ( data = dml_data, n_folds = n_folds, @@ -135,21 +138,24 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par dml_procedure = dml_procedure, score = score, apply_cross_fitting = FALSE) - - dml_pliv_nocf$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_nocf$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = learner_pars$params$params_m) - dml_pliv_nocf$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + + dml_pliv_nocf$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_nocf$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = learner_pars$params$params_m) + dml_pliv_nocf$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_nocf$fit() theta_obj = dml_pliv_nocf$coef se_obj = dml_pliv_nocf$se - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) } @@ -159,68 +165,74 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par .cases = test_cases, { n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) - + df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) set.seed(3141) dml_pliv_obj = DoubleMLPLIV.partialXZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = learner_pars$params$params_g) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = learner_pars$params$params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = learner_pars$params$params_g) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = learner_pars$params$params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se - + params_g_fold_wise = rep(list(rep(list(learner_pars$params$params_g), n_folds)), n_rep) params_m_fold_wise = rep(list(rep(list(learner_pars$params$params_m), n_folds)), n_rep) params_r_fold_wise = rep(list(rep(list(learner_pars$params$params_r), n_folds)), n_rep) - + set.seed(3141) dml_pliv_obj_fold_wise = DoubleMLPLIV.partialXZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = params_g_fold_wise, - set_fold_specific = TRUE) - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = params_m_fold_wise, - set_fold_specific = TRUE) - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r_fold_wise, - set_fold_specific = TRUE) - + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = params_g_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = params_m_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$fit() theta_fold_wise = dml_pliv_obj_fold_wise$coef se_fold_wise = dml_pliv_obj_fold_wise$se - + expect_equal(theta, theta_fold_wise, tolerance = 1e-8) expect_equal(se, se_fold_wise, tolerance = 1e-8) } @@ -230,53 +242,56 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par .cases = test_cases, { n_folds = 2 n_rep = 3 - + params_g = list(cp = 0.01, minsplit = 20) # this are defaults params_m = list(cp = 0.01, minsplit = 20) # this are defaults params_r = list(cp = 0.01, minsplit = 20) # this are defaults - + df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) - + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + set.seed(3141) dml_pliv_default = DoubleMLPLIV.partialXZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - + n_folds = n_folds, n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + dml_pliv_default$fit() theta_default = dml_pliv_default$coef se_default = dml_pliv_default$se - + set.seed(3141) dml_pliv_obj = DoubleMLPLIV.partialXZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_g", - params = params_g) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_m", - params = params_m) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r) - + n_folds = n_folds, n_rep = n_rep, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_g", + params = params_g) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_m", + params = params_m) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se - + expect_equal(theta, theta_default, tolerance = 1e-8) expect_equal(se, se_default, tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_pliv_partial_z.R b/tests/testthat/test-double_ml_pliv_partial_z.R index 9381efa2..d1876a92 100644 --- a/tests/testthat/test-double_ml_pliv_partial_z.R +++ b/tests/testthat/test-double_ml_pliv_partial_z.R @@ -27,25 +27,25 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", se = pliv_hat$se boot_theta = bootstrap_pliv_partial_z(pliv_hat$thetas, pliv_hat$ses, - data_pliv_partialZ$df, - y = "y", d = "d", z = paste0("Z", 1:dim_z), - n_folds = 5, smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef - + data_pliv_partialZ$df, + y = "y", d = "d", z = paste0("Z", 1:dim_z), + n_folds = 5, smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + set.seed(3141) double_mlpliv_obj = DoubleMLPLIV.partialZ(data_pliv_partialZ$dml_data, - ml_r = learner_pars$ml_r$clone(), - n_folds = 5, - score = score, - dml_procedure = dml_procedure) + ml_r = learner_pars$ml_r$clone(), + n_folds = 5, + score = score, + dml_procedure = dml_procedure) double_mlpliv_obj$fit() theta_obj = double_mlpliv_obj$coef se_obj = double_mlpliv_obj$se # bootstrap - double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlpliv_obj$boot_coef # at the moment the object result comes without a name @@ -56,13 +56,16 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", ) test_that("Unit tests for PLIV.partialZ invalid score", { - msg = paste("Callable score not implemented for DoubleMLPLIV with", - "partialX=FALSE and partialZ=TRUE.") + msg = paste( + "Callable score not implemented for DoubleMLPLIV with", + "partialX=FALSE and partialZ=TRUE.") double_mlplr_obj <- DoubleMLPLIV.partialZ( data_pliv_partialZ$dml_data, - ml_r = mlr3::lrn('regr.rpart'), - score = function(x) return(mean(x))) + ml_r = mlr3::lrn("regr.rpart"), + score = function(x) { + return(mean(x)) + }) expect_error(double_mlplr_obj$fit(), - regexp = msg) + regexp = msg) } ) diff --git a/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R b/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R index e2d155a7..7c2941cb 100644 --- a/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R +++ b/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R @@ -26,7 +26,7 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df @@ -37,11 +37,11 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par set.seed(3141) pliv_hat = dml_pliv_partial_z(df, - y = "y", d = "d", z = c("z", "z2"), - n_folds = n_folds, n_rep = n_rep, - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score) + y = "y", d = "d", z = c("z", "z2"), + n_folds = n_folds, n_rep = n_rep, + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se @@ -62,10 +62,10 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par set.seed(3141) Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] - + dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) dml_pliv_obj = DoubleMLPLIV.partialZ( data = dml_data, n_folds = n_folds, n_rep = n_rep, @@ -73,23 +73,24 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par dml_procedure = dml_procedure, score = score) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() - + theta_obj = dml_pliv_obj$coef se_obj = dml_pliv_obj$se - + # bootstrap set.seed(3141) dml_pliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) boot_theta_obj = dml_pliv_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) - + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } ) @@ -97,7 +98,7 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.partialZ (no cross-fitting)", .cases = test_cases_nocf, { n_folds = 2 - + learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df @@ -113,23 +114,23 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par train_ids = list(my_sampling$train_set(1)) test_ids = list(my_sampling$test_set(1)) smpls = list(list(train_ids = train_ids, test_ids = test_ids)) - + pliv_hat = dml_pliv_partial_z(df, - y = "y", d = "d", z = c("z", "z2"), - n_folds = 1, - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - params_r = learner_pars$params$params_r, - dml_procedure = dml_procedure, score = score, - smpls=smpls) + y = "y", d = "d", z = c("z", "z2"), + n_folds = 1, + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + params_r = learner_pars$params$params_r, + dml_procedure = dml_procedure, score = score, + smpls = smpls) theta = pliv_hat$coef se = pliv_hat$se - + set.seed(3141) Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] - + dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) dml_pliv_nocf = DoubleMLPLIV.partialZ( data = dml_data, n_folds = n_folds, @@ -138,14 +139,15 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par score = score, apply_cross_fitting = FALSE) - dml_pliv_nocf$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + dml_pliv_nocf$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_nocf$fit() theta_obj = dml_pliv_nocf$coef se_obj = dml_pliv_nocf$se - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) } @@ -156,48 +158,50 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par .cases = test_cases, { n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_pliv(learner) - + df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) - + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + set.seed(3141) dml_pliv_obj = DoubleMLPLIV.partialZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = learner_pars$params$params_r) - + n_folds = n_folds, n_rep = n_rep, + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = learner_pars$params$params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se params_r_fold_wise = rep(list(rep(list(learner_pars$params$params_r), n_folds)), n_rep) - + set.seed(3141) dml_pliv_obj_fold_wise = DoubleMLPLIV.partialZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), - dml_procedure = dml_procedure, - score = score) - - dml_pliv_obj_fold_wise$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r_fold_wise, - set_fold_specific = TRUE) - + n_folds = n_folds, n_rep = n_rep, + ml_r = mlr3::lrn(learner_pars$mlmethod$mlmethod_r), + dml_procedure = dml_procedure, + score = score) + + dml_pliv_obj_fold_wise$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r_fold_wise, + set_fold_specific = TRUE) + dml_pliv_obj_fold_wise$fit() theta_fold_wise = dml_pliv_obj_fold_wise$coef se_fold_wise = dml_pliv_obj_fold_wise$se - + expect_equal(theta, theta_fold_wise, tolerance = 1e-8) expect_equal(se, se_fold_wise, tolerance = 1e-8) } @@ -209,38 +213,39 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par n_rep = 3 params_r = list(cp = 0.01, minsplit = 20) # this are defaults - + df = data_pliv$df Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) - + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2")) + set.seed(3141) dml_pliv_default = DoubleMLPLIV.partialZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - + n_folds = n_folds, n_rep = n_rep, + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + dml_pliv_default$fit() theta_default = dml_pliv_default$coef se_default = dml_pliv_default$se - + set.seed(3141) dml_pliv_obj = DoubleMLPLIV.partialZ(dml_data, - n_folds = n_folds, n_rep = n_rep, - ml_r = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score) - dml_pliv_obj$set_ml_nuisance_params(treat_var = "d", - learner = "ml_r", - params = params_r) - + n_folds = n_folds, n_rep = n_rep, + ml_r = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score) + dml_pliv_obj$set_ml_nuisance_params( + treat_var = "d", + learner = "ml_r", + params = params_r) + dml_pliv_obj$fit() theta = dml_pliv_obj$coef se = dml_pliv_obj$se - + expect_equal(theta, theta_default, tolerance = 1e-8) expect_equal(se, se_default, tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_pliv_user_score.R b/tests/testthat/test-double_ml_pliv_user_score.R index 8bedffc6..3b926684 100644 --- a/tests/testthat/test-double_ml_pliv_user_score.R +++ b/tests/testthat/test-double_ml_pliv_user_score.R @@ -47,12 +47,12 @@ patrick::with_parameters_test_that("Unit tests for PLIV, callable score:", theta_obj = double_mlpliv_obj$coef se_obj = double_mlpliv_obj$se - double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlpliv_obj$boot_coef set.seed(3141) double_mlpliv_obj_score = DoubleMLPLIV$new( - data = data_pliv$dml_data, + data = data_pliv$dml_data, n_folds = 5, ml_g = lrn(learner), ml_m = lrn(learner), @@ -64,7 +64,7 @@ patrick::with_parameters_test_that("Unit tests for PLIV, callable score:", theta_obj_score = double_mlpliv_obj_score$coef se_obj_score = double_mlpliv_obj_score$se - double_mlpliv_obj_score$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlpliv_obj_score$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_score = double_mlpliv_obj_score$boot_coef expect_equal(theta_obj, theta_obj_score, tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_plr.R b/tests/testthat/test-double_ml_plr.R index d36aec27..5a43dd94 100644 --- a/tests/testthat/test-double_ml_plr.R +++ b/tests/testthat/test-double_ml_plr.R @@ -36,15 +36,15 @@ patrick::with_parameters_test_that("Unit tests for PLR:", se = plr_hat$se t = plr_hat$t pval = plr_hat$pval - #ci = confint(plr_hat, level = 0.95, joint = FALSE) - + # ci = confint(plr_hat, level = 0.95, joint = FALSE) + boot_theta = bootstrap_plr(plr_hat$thetas, plr_hat$ses, - data_plr$df, - y = "y", d = "d", - n_folds = n_folds, smpls = plr_hat$smpls, - all_preds= plr_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot, - score = score)$boot_coef + data_plr$df, + y = "y", d = "d", + n_folds = n_folds, smpls = plr_hat$smpls, + all_preds = plr_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot, + score = score)$boot_coef set.seed(3141) double_mlplr_obj = DoubleMLPLR$new( @@ -60,17 +60,17 @@ patrick::with_parameters_test_that("Unit tests for PLR:", se_obj = double_mlplr_obj$se t_obj = double_mlplr_obj$t_stat pval_obj = double_mlplr_obj$pval - #ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE) - + # ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE) + # bootstrap - double_mlplr_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlplr_obj$boot_coef expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) expect_equal(t, t_obj, tolerance = 1e-8) expect_equal(pval, pval_obj, tolerance = 1e-8) - #expect_equal(ci, ci_obj, tolerance = 1e-8) + # expect_equal(ci, ci_obj, tolerance = 1e-8) expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_plr_classifier.R b/tests/testthat/test-double_ml_plr_classifier.R index 69b3cf4b..b51fb146 100644 --- a/tests/testthat/test-double_ml_plr_classifier.R +++ b/tests/testthat/test-double_ml_plr_classifier.R @@ -30,27 +30,27 @@ patrick::with_parameters_test_that("Unit tests for PLR with classifier for ml_m: if (g_learner == "regr.cv_glmnet") { ml_g = mlr3::lrn(g_learner) ml_m = mlr3::lrn(m_learner) - + set.seed(3141) plr_hat = dml_plr(data_irm$df, - y = "y", d = "d", - n_folds = n_folds, - ml_g = ml_g$clone(), ml_m = ml_m$clone(), - dml_procedure = dml_procedure, score = score) + y = "y", d = "d", + n_folds = n_folds, + ml_g = ml_g$clone(), ml_m = ml_m$clone(), + dml_procedure = dml_procedure, score = score) theta = plr_hat$coef se = plr_hat$se - + boot_theta = bootstrap_plr(plr_hat$thetas, plr_hat$ses, - data_irm$df, - y = "y", d = "d", - n_folds = n_folds, smpls = plr_hat$smpls, - all_preds= plr_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot, - score = score)$boot_coef - + data_irm$df, + y = "y", d = "d", + n_folds = n_folds, smpls = plr_hat$smpls, + all_preds = plr_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot, + score = score)$boot_coef + t = plr_hat$t pval = plr_hat$pval - + set.seed(3141) double_mlplr_obj = DoubleMLPLR$new( data = data_irm$dml_data, @@ -64,17 +64,17 @@ patrick::with_parameters_test_that("Unit tests for PLR with classifier for ml_m: se_obj = double_mlplr_obj$se t_obj = double_mlplr_obj$t_stat pval_obj = double_mlplr_obj$pval - #ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE) - + # ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE) + # bootstrap - double_mlplr_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlplr_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) expect_equal(t, t_obj, tolerance = 1e-8) expect_equal(pval, pval_obj, tolerance = 1e-8) - #expect_equal(ci, ci_obj, tolerance = 1e-8) + # expect_equal(ci, ci_obj, tolerance = 1e-8) } else if (g_learner == "classif.cv_glmnet") { msg = "Invalid learner provided for ml_g: must be of class 'LearnerRegr'" @@ -85,37 +85,40 @@ patrick::with_parameters_test_that("Unit tests for PLR with classifier for ml_m: dml_procedure = dml_procedure, n_folds = n_folds, score = score), - regexp = msg) + regexp = msg) } } ) test_that("Unit tests for exception handling of PLR with classifier for ml_m:", { # Only binary outcome with values 0 and 1 is allowed when ml_m is a classifier - + # Test with 0 and 2 df = data_irm$df - df['d'] = df['d']*2 - dml_data = double_ml_data_from_data_frame(df, y_col = 'y', d_cols = 'd') - double_mlplr_obj = DoubleMLPLR$new(data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart')) - msg = paste("Assertion on 'levels\\(data\\[\\[target\\]\\])' failed:", - "Must be equal to set \\{'0','1'\\}, but is \\{'0','2'\\}.") + df["d"] = df["d"] * 2 + dml_data = double_ml_data_from_data_frame(df, y_col = "y", d_cols = "d") + double_mlplr_obj = DoubleMLPLR$new( + data = dml_data, + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart")) + msg = paste( + "Assertion on 'levels\\(data\\[\\[target\\]\\])' failed:", + "Must be equal to set \\{'0','1'\\}, but is \\{'0','2'\\}.") expect_error(double_mlplr_obj$fit(), - regexp = msg) - + regexp = msg) + # Test with 0.5 and 1 df = data_irm$df - df['d'] = (df['d']+2)/2 - dml_data = double_ml_data_from_data_frame(df, y_col = 'y', d_cols = 'd') - double_mlplr_obj = DoubleMLPLR$new(data = dml_data, - ml_g = mlr3::lrn('regr.rpart'), - ml_m = mlr3::lrn('classif.rpart')) - msg = paste("Assertion on 'levels\\(data\\[\\[target\\]\\])' failed:", - "Must be equal to set \\{'0','1'\\}, but is \\{'1','1.5'\\}.") + df["d"] = (df["d"] + 2) / 2 + dml_data = double_ml_data_from_data_frame(df, y_col = "y", d_cols = "d") + double_mlplr_obj = DoubleMLPLR$new( + data = dml_data, + ml_g = mlr3::lrn("regr.rpart"), + ml_m = mlr3::lrn("classif.rpart")) + msg = paste( + "Assertion on 'levels\\(data\\[\\[target\\]\\])' failed:", + "Must be equal to set \\{'0','1'\\}, but is \\{'1','1.5'\\}.") expect_error(double_mlplr_obj$fit(), - regexp = msg) - - } + regexp = msg) +} ) diff --git a/tests/testthat/test-double_ml_plr_exception_handling.R b/tests/testthat/test-double_ml_plr_exception_handling.R index 66904653..73749101 100644 --- a/tests/testthat/test-double_ml_plr_exception_handling.R +++ b/tests/testthat/test-double_ml_plr_exception_handling.R @@ -57,7 +57,7 @@ patrick::with_parameters_test_that("Unit tests for exception handling of PLR:", n_rep = n_rep, score = score, apply_cross_fitting = apply_cross_fitting), - regexp = msg) + regexp = msg) } else { double_mlplr_obj = DoubleMLPLR$new( data = data_ml, @@ -87,7 +87,7 @@ patrick::with_parameters_test_that("Unit tests for exception handling of PLR:", utils::capture.output(double_mlplr_obj$summary(), file = NULL) msg = "Apply fit\\(\\) before bootstrap\\(\\)." expect_error(double_mlplr_obj$bootstrap(method = "normal", n_rep_boot = n_rep_boot), - regexp = msg) + regexp = msg) double_mlplr_obj$fit() @@ -96,10 +96,10 @@ patrick::with_parameters_test_that("Unit tests for exception handling of PLR:", msg = "'level' must be > 0 and < 1." expect_error(double_mlplr_obj$confint(level = 1.2), - regexp = msg) + regexp = msg) msg = "Multiplier bootstrap has not yet been performed. First call bootstrap\\(\\) and then try confint\\(\\) again." expect_error(double_mlplr_obj$confint(joint = TRUE, level = 0.95), - regexp = msg) + regexp = msg) } } ) diff --git a/tests/testthat/test-double_ml_plr_loaded_mlr3learner.R b/tests/testthat/test-double_ml_plr_loaded_mlr3learner.R index 6e6125dc..c8deff11 100644 --- a/tests/testthat/test-double_ml_plr_loaded_mlr3learner.R +++ b/tests/testthat/test-double_ml_plr_loaded_mlr3learner.R @@ -55,7 +55,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", t = double_mlplr$t_stat pval = double_mlplr$pval ci = double_mlplr$confint(level = 0.95, joint = FALSE) - double_mlplr$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta = double_mlplr$boot_coef set.seed(123) @@ -74,7 +74,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", t_loaded = double_mlplr_loaded$t_stat pval_loaded = double_mlplr_loaded$pval ci_loaded = double_mlplr_loaded$confint(level = 0.95, joint = FALSE) - double_mlplr$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_loaded = double_mlplr$boot_coef set.seed(123) @@ -104,7 +104,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", t_semiloaded = double_mlplr_semiloaded$t_stat pval_semiloaded = double_mlplr_semiloaded$pval ci_semiloaded = double_mlplr_semiloaded$confint(level = 0.95, joint = FALSE) - double_mlplr$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_semiloaded = double_mlplr$boot_coef expect_equal(theta, theta_loaded, tolerance = 1e-8) @@ -113,7 +113,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", expect_equal(pval, pval_loaded, tolerance = 1e-8) expect_equal(ci, ci_loaded, tolerance = 1e-8) expect_equal(as.vector(boot_theta), as.vector(boot_theta_loaded), tolerance = 1e-8) - + expect_equal(theta_semiloaded, theta_loaded, tolerance = 1e-8) expect_equal(se_semiloaded, se_loaded, tolerance = 1e-8) expect_equal(t_semiloaded, t_loaded, tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_plr_multitreat.R b/tests/testthat/test-double_ml_plr_multitreat.R index f4f2c315..a31808fc 100644 --- a/tests/testthat/test-double_ml_plr_multitreat.R +++ b/tests/testthat/test-double_ml_plr_multitreat.R @@ -30,26 +30,26 @@ patrick::with_parameters_test_that("Unit tests for PLR:", set.seed(3141) plr_hat = dml_plr_multitreat(data_plr_multi, - y = "y", d = c("d1", "d2", "d3"), - n_folds = n_folds, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - dml_procedure = dml_procedure, score = score) + y = "y", d = c("d1", "d2", "d3"), + n_folds = n_folds, + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + dml_procedure = dml_procedure, score = score) theta = plr_hat$coef se = plr_hat$se t = plr_hat$t pval = plr_hat$pval - #ci_ptwise = confint(plr_hat, joint = FALSE, level = 0.95) + # ci_ptwise = confint(plr_hat, joint = FALSE, level = 0.95) set.seed(3141) boot_theta = boot_plr_multitreat(plr_hat$thetas, plr_hat$ses, - data_plr_multi, - y = "y", d = c("d1", "d2", "d3"), - n_folds = n_folds, smpls = plr_hat$smpls, - all_preds= plr_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot, - score = score)$boot_coef - + data_plr_multi, + y = "y", d = c("d1", "d2", "d3"), + n_folds = n_folds, smpls = plr_hat$smpls, + all_preds = plr_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot, + score = score)$boot_coef + set.seed(3141) Xnames = names(data_plr_multi)[names(data_plr_multi) %in% c("y", "d1", "d2", "d3", "z") == FALSE] data_ml = double_ml_data_from_data_frame(data_plr_multi, @@ -85,7 +85,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", expect_equal(pval, pval_obj, tolerance = 1e-8) expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) - #expect_equal(ci_ptwise, ci_ptwise_obj) - #expect_equal(ci_joint, ci_joint_obj) + # expect_equal(ci_ptwise, ci_ptwise_obj) + # expect_equal(ci_joint, ci_joint_obj) } ) diff --git a/tests/testthat/test-double_ml_plr_nocrossfit.R b/tests/testthat/test-double_ml_plr_nocrossfit.R index 5117c6b8..5e22f3e7 100644 --- a/tests/testthat/test-double_ml_plr_nocrossfit.R +++ b/tests/testthat/test-double_ml_plr_nocrossfit.R @@ -28,7 +28,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", .cases = test_cases, { learner_pars = get_default_mlmethod_plr(learner) n_rep_boot = 498 - + set.seed(3141) df = data_plr$df if (n_folds == 2) { @@ -36,19 +36,20 @@ patrick::with_parameters_test_that("Unit tests for PLR:", my_sampling = rsmp("holdout", ratio = 0.5)$instantiate(my_task) train_ids = list(my_sampling$train_set(1)) test_ids = list(my_sampling$test_set(1)) - + smpls = list(list(train_ids = train_ids, test_ids = test_ids)) } else { - smpls = list(list(train_ids = list(seq(nrow(df))), - test_ids = list(seq(nrow(df))))) + smpls = list(list( + train_ids = list(seq(nrow(df))), + test_ids = list(seq(nrow(df))))) } plr_hat = dml_plr(df, - y = "y", d = "d", - n_folds = 1, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - dml_procedure = dml_procedure, score = score, - smpls=smpls) + y = "y", d = "d", + n_folds = 1, + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + dml_procedure = dml_procedure, score = score, + smpls = smpls) theta = plr_hat$coef se = plr_hat$se t = plr_hat$t @@ -64,7 +65,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", score = score, apply_cross_fitting = apply_cross_fitting) - double_mlplr_obj$fit(store_predictions=TRUE) + double_mlplr_obj$fit(store_predictions = TRUE) theta_obj = double_mlplr_obj$coef se_obj = double_mlplr_obj$se t_obj = double_mlplr_obj$t_stat @@ -99,7 +100,7 @@ patrick::with_parameters_test_that("Unit tests for PLR:", t_external = dml_plr_obj_external$t_stat pval_external = dml_plr_obj_external$pval ci_external = dml_plr_obj_external$confint(level = 0.95, joint = FALSE) - + expect_identical(double_mlplr_obj$smpls, dml_plr_obj_external$smpls) expect_equal(theta_external, theta_obj, tolerance = 1e-8) expect_equal(se_external, se_obj, tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_plr_parameter_passing.R b/tests/testthat/test-double_ml_plr_parameter_passing.R index 2067007a..a9ef63fe 100644 --- a/tests/testthat/test-double_ml_plr_parameter_passing.R +++ b/tests/testthat/test-double_ml_plr_parameter_passing.R @@ -5,20 +5,20 @@ lgr::get_logger("mlr3")$set_threshold("warn") on_cran = !identical(Sys.getenv("NOT_CRAN"), "true") if (on_cran) { test_cases = expand.grid( - learner = 'regr.rpart', + learner = "regr.rpart", dml_procedure = "dml2", score = "partialling out", stringsAsFactors = FALSE) } else { test_cases = expand.grid( - learner = 'regr.rpart', + learner = "regr.rpart", dml_procedure = c("dml1", "dml2"), score = c("IV-type", "partialling out"), stringsAsFactors = FALSE) } test_cases_nocf = expand.grid( - learner = 'regr.rpart', + learner = "regr.rpart", dml_procedure = "dml1", score = "partialling out", stringsAsFactors = FALSE) @@ -40,24 +40,24 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLR (oop set.seed(3141) plr_hat = dml_plr_multitreat(data_plr_multi, - y = "y", d = c("d1", "d2"), - n_folds = n_folds, n_rep = n_rep, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - params_g = params_g, - params_m = params_m, - dml_procedure = dml_procedure, score = score) + y = "y", d = c("d1", "d2"), + n_folds = n_folds, n_rep = n_rep, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + params_g = params_g, + params_m = params_m, + dml_procedure = dml_procedure, score = score) theta = plr_hat$coef se = plr_hat$se - + boot_theta = boot_plr_multitreat(plr_hat$thetas, plr_hat$ses, - data_plr_multi, - y = "y", d = c("d1", "d2"), - n_folds = n_folds, n_rep = n_rep, - smpls = plr_hat$smpls, - all_preds= plr_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot, - score = score)$boot_coef + data_plr_multi, + y = "y", d = c("d1", "d2"), + n_folds = n_folds, n_rep = n_rep, + smpls = plr_hat$smpls, + all_preds = plr_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot, + score = score)$boot_coef Xnames = names(data_plr_multi)[names(data_plr_multi) %in% c("y", "d1", "d2", "z") == FALSE] data_ml = double_ml_data_from_data_frame(data_plr_multi, @@ -73,31 +73,35 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLR (oop score = score, n_rep = n_rep) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d1", learner = "ml_g", - params = learner_pars$params$params_g) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d2", learner = "ml_g", - params = learner_pars$params$params_g) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d1", learner = "ml_m", - params = learner_pars$params$params_m) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d2", learner = "ml_m", - params = learner_pars$params$params_m) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_g", + params = learner_pars$params$params_g) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_g", + params = learner_pars$params$params_g) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_m", + params = learner_pars$params$params_m) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_m", + params = learner_pars$params$params_m) double_mlplr_obj$fit() theta_obj = double_mlplr_obj$coef se_obj = double_mlplr_obj$se - + # bootstrap - double_mlplr_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlplr_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) - + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } ) - + patrick::with_parameters_test_that("Unit tests for parameter passing of PLR (no cross-fitting)", .cases = test_cases_nocf, { n_rep_boot = 498 @@ -116,45 +120,49 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLR (no smpls = list(list(train_ids = train_ids, test_ids = test_ids)) plr_hat = dml_plr_multitreat(data_plr_multi, - y = "y", d = c("d1", "d2"), - n_folds = 1, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - params_g = params_g, - params_m = params_m, - dml_procedure = dml_procedure, score = score, - smpls=smpls) + y = "y", d = c("d1", "d2"), + n_folds = 1, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + params_g = params_g, + params_m = params_m, + dml_procedure = dml_procedure, score = score, + smpls = smpls) theta = plr_hat$coef se = plr_hat$se - + Xnames = names(data_plr_multi)[names(data_plr_multi) %in% c("y", "d1", "d2", "z") == FALSE] data_ml = double_ml_data_from_data_frame(data_plr_multi, - y_col = "y", - d_cols = c("d1", "d2"), x_cols = Xnames) - + y_col = "y", + d_cols = c("d1", "d2"), x_cols = Xnames) + set.seed(3141) double_mlplr_obj_nocf = DoubleMLPLR$new(data_ml, - n_folds = n_folds, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - dml_procedure = dml_procedure, - score = score, - apply_cross_fitting = FALSE) - - double_mlplr_obj_nocf$set_ml_nuisance_params(treat_var = "d1", learner = "ml_g", - params = learner_pars$params$params_g) - double_mlplr_obj_nocf$set_ml_nuisance_params(treat_var = "d2", learner = "ml_g", - params = learner_pars$params$params_g) - double_mlplr_obj_nocf$set_ml_nuisance_params(treat_var = "d1", learner = "ml_m", - params = learner_pars$params$params_m) - double_mlplr_obj_nocf$set_ml_nuisance_params(treat_var = "d2", learner = "ml_m", - params = learner_pars$params$params_m) - - + n_folds = n_folds, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + dml_procedure = dml_procedure, + score = score, + apply_cross_fitting = FALSE) + + double_mlplr_obj_nocf$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_g", + params = learner_pars$params$params_g) + double_mlplr_obj_nocf$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_g", + params = learner_pars$params$params_g) + double_mlplr_obj_nocf$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_m", + params = learner_pars$params$params_m) + double_mlplr_obj_nocf$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_m", + params = learner_pars$params$params_m) + + double_mlplr_obj_nocf$fit() theta_obj_nocf = double_mlplr_obj_nocf$coef se_obj_nocf = double_mlplr_obj_nocf$se - + expect_equal(theta, theta_obj_nocf, tolerance = 1e-8) expect_equal(se, se_obj_nocf, tolerance = 1e-8) } @@ -165,65 +173,73 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLR (fol n_rep_boot = 498 n_folds = 2 n_rep = 3 - + learner_pars = get_default_mlmethod_plr(learner) - + Xnames = names(data_plr_multi)[names(data_plr_multi) %in% c("y", "d1", "d2", "z") == FALSE] data_ml = double_ml_data_from_data_frame(data_plr_multi, - y_col = "y", - d_cols = c("d1", "d2"), x_cols = Xnames) - + y_col = "y", + d_cols = c("d1", "d2"), x_cols = Xnames) + set.seed(3141) double_mlplr_obj = DoubleMLPLR$new(data_ml, - n_folds = n_folds, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep) - - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d1", learner = "ml_g", - params = learner_pars$params$params_g) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d2", learner = "ml_g", - params = learner_pars$params$params_g) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d1", learner = "ml_m", - params = learner_pars$params$params_m) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d2", learner = "ml_m", - params = learner_pars$params$params_m) - + n_folds = n_folds, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) + + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_g", + params = learner_pars$params$params_g) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_g", + params = learner_pars$params$params_g) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_m", + params = learner_pars$params$params_m) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_m", + params = learner_pars$params$params_m) + double_mlplr_obj$fit() theta = double_mlplr_obj$coef se = double_mlplr_obj$se - + params_g_fold_wise = rep(list(rep(list(learner_pars$params$params_g), n_folds)), n_rep) params_m_fold_wise = rep(list(rep(list(learner_pars$params$params_m), n_folds)), n_rep) - + set.seed(3141) dml_plr_fold_wise = DoubleMLPLR$new(data_ml, - n_folds = n_folds, - ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), - ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep) - - dml_plr_fold_wise$set_ml_nuisance_params(treat_var = "d1", learner = "ml_g", - params = params_g_fold_wise, - set_fold_specific = TRUE) - dml_plr_fold_wise$set_ml_nuisance_params(treat_var = "d2", learner = "ml_g", - params = params_g_fold_wise, - set_fold_specific = TRUE) - dml_plr_fold_wise$set_ml_nuisance_params(treat_var = "d1", learner = "ml_m", - params = params_m_fold_wise, - set_fold_specific = TRUE) - dml_plr_fold_wise$set_ml_nuisance_params(treat_var = "d2", learner = "ml_m", - params = params_m_fold_wise, - set_fold_specific = TRUE) - + n_folds = n_folds, + ml_g = mlr3::lrn(learner_pars$mlmethod$mlmethod_g), + ml_m = mlr3::lrn(learner_pars$mlmethod$mlmethod_m), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) + + dml_plr_fold_wise$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_g", + params = params_g_fold_wise, + set_fold_specific = TRUE) + dml_plr_fold_wise$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_g", + params = params_g_fold_wise, + set_fold_specific = TRUE) + dml_plr_fold_wise$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_m", + params = params_m_fold_wise, + set_fold_specific = TRUE) + dml_plr_fold_wise$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_m", + params = params_m_fold_wise, + set_fold_specific = TRUE) + dml_plr_fold_wise$fit() theta_fold_wise = dml_plr_fold_wise$coef se_fold_wise = dml_plr_fold_wise$se - + expect_equal(theta, theta_fold_wise, tolerance = 1e-8) expect_equal(se, se_fold_wise, tolerance = 1e-8) } @@ -234,49 +250,53 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLR (def n_rep_boot = 498 n_folds = 2 n_rep = 3 - + params_g = list(cp = 0.01, minsplit = 20) # this are defaults params_m = list(cp = 0.01, minsplit = 20) # this are defaults - + Xnames = names(data_plr_multi)[names(data_plr_multi) %in% c("y", "d1", "d2", "z") == FALSE] data_ml = double_ml_data_from_data_frame(data_plr_multi, - y_col = "y", - d_cols = c("d1", "d2"), x_cols = Xnames) - + y_col = "y", + d_cols = c("d1", "d2"), x_cols = Xnames) + set.seed(3141) dml_plr_default = DoubleMLPLR$new(data_ml, - n_folds = n_folds, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep) - + n_folds = n_folds, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) + dml_plr_default$fit() theta_default = dml_plr_default$coef se_default = dml_plr_default$se - + set.seed(3141) double_mlplr_obj = DoubleMLPLR$new(data_ml, - n_folds = n_folds, - ml_g = lrn('regr.rpart'), - ml_m = lrn('regr.rpart'), - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d1", learner = "ml_g", - params = params_g) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d2", learner = "ml_g", - params = params_g) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d1", learner = "ml_m", - params = params_m) - double_mlplr_obj$set_ml_nuisance_params(treat_var = "d2", learner = "ml_m", - params = params_m) - + n_folds = n_folds, + ml_g = lrn("regr.rpart"), + ml_m = lrn("regr.rpart"), + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_g", + params = params_g) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_g", + params = params_g) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d1", learner = "ml_m", + params = params_m) + double_mlplr_obj$set_ml_nuisance_params( + treat_var = "d2", learner = "ml_m", + params = params_m) + double_mlplr_obj$fit() theta = double_mlplr_obj$coef se = double_mlplr_obj$se - + expect_equal(theta, theta_default, tolerance = 1e-8) expect_equal(se, se_default, tolerance = 1e-8) } diff --git a/tests/testthat/test-double_ml_plr_rep_cross_fit.R b/tests/testthat/test-double_ml_plr_rep_cross_fit.R index ec836cb5..e211b126 100644 --- a/tests/testthat/test-double_ml_plr_rep_cross_fit.R +++ b/tests/testthat/test-double_ml_plr_rep_cross_fit.R @@ -31,25 +31,25 @@ patrick::with_parameters_test_that("Unit tests for PLR:", set.seed(3141) n_folds = 5 plr_hat = dml_plr(data_plr$df, - y = "y", d = "d", - n_folds = n_folds, n_rep = n_rep, - ml_g = learner_pars$ml_g$clone(), - ml_m = learner_pars$ml_m$clone(), - dml_procedure = dml_procedure, score = score) + y = "y", d = "d", + n_folds = n_folds, n_rep = n_rep, + ml_g = learner_pars$ml_g$clone(), + ml_m = learner_pars$ml_m$clone(), + dml_procedure = dml_procedure, score = score) theta = plr_hat$coef se = plr_hat$se t = plr_hat$t pval = plr_hat$pval - #ci = confint(plr_hat, level = 0.95, joint = FALSE) - + # ci = confint(plr_hat, level = 0.95, joint = FALSE) + boot_theta = bootstrap_plr(plr_hat$thetas, plr_hat$ses, - data_plr$df, - y = "y", d = "d", - n_folds = n_folds, n_rep = n_rep, - smpls = plr_hat$smpls, - all_preds= plr_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot, - score = score)$boot_coef + data_plr$df, + y = "y", d = "d", + n_folds = n_folds, n_rep = n_rep, + smpls = plr_hat$smpls, + all_preds = plr_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot, + score = score)$boot_coef set.seed(3141) double_mlplr_obj = DoubleMLPLR$new( @@ -66,18 +66,18 @@ patrick::with_parameters_test_that("Unit tests for PLR:", se_obj = double_mlplr_obj$se t_obj = double_mlplr_obj$t_stat pval_obj = double_mlplr_obj$pval - #ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE) + # ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE) # bootstrap double_mlplr_obj$bootstrap(method = "normal", n_rep_boot = n_rep_boot) boot_theta_obj = double_mlplr_obj$boot_coef - + expect_equal(theta, theta_obj, tolerance = 1e-8) expect_equal(se, se_obj, tolerance = 1e-8) expect_equal(t, t_obj, tolerance = 1e-8) expect_equal(pval, pval_obj, tolerance = 1e-8) - #expect_equal(ci, ci_obj, tolerance = 1e-8) - + # expect_equal(ci, ci_obj, tolerance = 1e-8) + expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8) } ) diff --git a/tests/testthat/test-double_ml_plr_set_samples.R b/tests/testthat/test-double_ml_plr_set_samples.R index 56aae2c3..da408442 100644 --- a/tests/testthat/test-double_ml_plr_set_samples.R +++ b/tests/testthat/test-double_ml_plr_set_samples.R @@ -47,7 +47,7 @@ patrick::with_parameters_test_that("PLR with external sample provision:", double_mlplr_obj$fit() theta_obj = double_mlplr_obj$coef se_obj = double_mlplr_obj$se - double_mlplr_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlplr_obj$boot_coef # External sample provision @@ -65,7 +65,7 @@ patrick::with_parameters_test_that("PLR with external sample provision:", double_mlplr_obj_external$fit() theta_obj_external = double_mlplr_obj_external$coef se_obj_external = double_mlplr_obj_external$se - double_mlplr_obj_external$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlplr_obj_external$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj_external = double_mlplr_obj_external$boot_coef expect_equal(theta_obj, theta_obj_external, tolerance = 1e-8) diff --git a/tests/testthat/test-double_ml_set_sample_splitting.R b/tests/testthat/test-double_ml_set_sample_splitting.R index f63a2fe0..6c21fe1a 100644 --- a/tests/testthat/test-double_ml_set_sample_splitting.R +++ b/tests/testthat/test-double_ml_set_sample_splitting.R @@ -1,137 +1,161 @@ context("Unit tests for the method set_sample_splitting of class DoubleML") set.seed(3141) -dml_data = make_plr_CCDDHNR2018(n_obs=10) +dml_data = make_plr_CCDDHNR2018(n_obs = 10) ml_g = lrn("regr.ranger") ml_m = ml_g$clone() -dml_plr = DoubleMLPLR$new(dml_data, ml_g, ml_m, n_folds=7, n_rep=8) +dml_plr = DoubleMLPLR$new(dml_data, ml_g, ml_m, n_folds = 7, n_rep = 8) test_that("Unit tests for the method set_sample_splitting of class DoubleML", { - + # simple sample splitting with two folds and without cross-fitting - smpls = list(train_ids = list(c(1, 2, 3, 4, 5)), - test_ids = list(c(6, 7, 8, 9, 10))) + smpls = list( + train_ids = list(c(1, 2, 3, 4, 5)), + test_ids = list(c(6, 7, 8, 9, 10))) dml_plr$set_sample_splitting(smpls) - + expect_equal(dml_plr$n_folds, 2) expect_equal(dml_plr$n_rep, 1) expect_equal(dml_plr$apply_cross_fitting, FALSE) expect_equal(list(smpls), dml_plr$smpls) - + # no cross-fitting, no sample-splitting - smpls = list(train_ids = list(seq(10)), - test_ids = list(seq(10))) + smpls = list( + train_ids = list(seq(10)), + test_ids = list(seq(10))) dml_plr$set_sample_splitting(smpls) - + expect_equal(dml_plr$n_folds, 1) expect_equal(dml_plr$n_rep, 1) expect_equal(dml_plr$apply_cross_fitting, FALSE) expect_equal(list(smpls), dml_plr$smpls) - - smpls = list(train_ids = list(c(1, 2, 3, 4, 5)), - test_ids = list(c(6, 7)), - ids = list(c(8, 9, 10))) - msg = paste("Assertion on 'names\\(smpl\\)' failed: Must be equal to set", - "\\{'train_ids','test_ids'\\}, but is", - "\\{'train_ids','test_ids','ids'\\}.") + + smpls = list( + train_ids = list(c(1, 2, 3, 4, 5)), + test_ids = list(c(6, 7)), + ids = list(c(8, 9, 10))) + msg = paste( + "Assertion on 'names\\(smpl\\)' failed: Must be equal to set", + "\\{'train_ids','test_ids'\\}, but is", + "\\{'train_ids','test_ids','ids'\\}.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) + regexp = msg) # sample splitting with two folds and cross-fitting but no repeated cross-fitting - smpls = list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))) + smpls = list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))) dml_plr$set_sample_splitting(smpls) - + expect_equal(dml_plr$n_folds, 2) expect_equal(dml_plr$n_rep, 1) expect_equal(dml_plr$apply_cross_fitting, TRUE) expect_equal(list(smpls), dml_plr$smpls) - + # sample splitting with two folds and cross-fitting but no repeated cross-fitting - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5)))) + smpls = list(list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5)))) dml_plr$set_sample_splitting(smpls) - + expect_equal(dml_plr$n_folds, 2) expect_equal(dml_plr$n_rep, 1) expect_equal(dml_plr$apply_cross_fitting, TRUE) expect_equal(smpls, dml_plr$smpls) - - smpls = list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2), c(3, 4, 5))) + + smpls = list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2), c(3, 4, 5))) msg = "Number of folds for train and test samples do not match." expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - + regexp = msg) + # simple sample splitting with two folds and without cross-fitting - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5)), - test_ids = list(c(6, 7, 8, 9, 10)))) + smpls = list(list( + train_ids = list(c(1, 2, 3, 4, 5)), + test_ids = list(c(6, 7, 8, 9, 10)))) dml_plr$set_sample_splitting(smpls) - + expect_equal(dml_plr$n_folds, 2) expect_equal(dml_plr$n_rep, 1) expect_equal(dml_plr$apply_cross_fitting, FALSE) expect_equal(smpls, dml_plr$smpls) - + # sample splitting with cross-fitting and two folds that do not form a partition - smpls = list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5, 10))) - msg = paste("Invalid partition provided. Tuples \\(train_ids, test_ids\\)", - "for more than one fold provided that don't form a partition.") + smpls = list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5, 10))) + msg = paste( + "Invalid partition provided. Tuples \\(train_ids, test_ids\\)", + "for more than one fold provided that don't form a partition.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - + regexp = msg) + # sample splitting with cross-fitting and two folds that do not form a partition - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5, 10)))) - msg = paste("Invalid partition provided. Tuples \\(train_ids, test_ids\\)", - "for more than one fold provided that don't form a partition.") + smpls = list(list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5, 10)))) + msg = paste( + "Invalid partition provided. Tuples \\(train_ids, test_ids\\)", + "for more than one fold provided that don't form a partition.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - + regexp = msg) + # sample splitting with two folds and repeated cross-fitting with n_rep = 2 - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) dml_plr$set_sample_splitting(smpls) - + expect_equal(dml_plr$n_folds, 2) expect_equal(dml_plr$n_rep, 2) expect_equal(dml_plr$apply_cross_fitting, TRUE) expect_equal(smpls, dml_plr$smpls) - - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), - c(2, 4, 6, 7, 8, 9, 10), - c(1, 2, 3, 5, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), - c(1, 3, 5), - c(7, 9)))) + + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list( + c(1, 3, 5, 7, 9), + c(2, 4, 6, 7, 8, 9, 10), + c(1, 2, 3, 5, 4, 6, 8, 10)), + test_ids = list( + c(2, 4, 6, 8, 10), + c(1, 3, 5), + c(7, 9)))) msg = "Different number of folds for repeated cross-fitting." expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - + regexp = msg) + # repeated no-cross-fitting - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5)), - test_ids = list(c(6, 7, 8, 9, 10))), - list(train_ids = list(c(2, 4, 6, 8, 10)), - test_ids = list(c(1, 3, 5, 7, 9)))) + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5)), + test_ids = list(c(6, 7, 8, 9, 10))), + list( + train_ids = list(c(2, 4, 6, 8, 10)), + test_ids = list(c(1, 3, 5, 7, 9)))) msg = "Repeated sample splitting without cross-fitting not implemented." expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - + regexp = msg) + # no-cross-fitting - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5)), - test_ids = list(c(6, 7, 8, 9, 10)))) + smpls = list(list( + train_ids = list(c(1, 2, 3, 4, 5)), + test_ids = list(c(6, 7, 8, 9, 10)))) dml_plr$set_sample_splitting(smpls) expect_equal(dml_plr$n_folds, 2) expect_equal(dml_plr$n_rep, 1) expect_equal(dml_plr$apply_cross_fitting, FALSE) expect_equal(smpls, dml_plr$smpls) - } +} ) assert_resampling_pars = function(dml_obj0, dml_obj1) { @@ -143,110 +167,138 @@ assert_resampling_pars = function(dml_obj0, dml_obj1) { test_that("Unit tests for the method set_sample_splitting of class DoubleML (draw vs set)", { set.seed(3141) - dml_plr_set = DoubleMLPLR$new(dml_data, ml_g, ml_m, n_folds=7, n_rep=8) - + dml_plr_set = DoubleMLPLR$new(dml_data, ml_g, ml_m, n_folds = 7, n_rep = 8) + dml_plr_drawn = DoubleMLPLR$new(dml_data, ml_g, ml_m, - n_folds=1, n_rep=1, apply_cross_fitting=FALSE) + n_folds = 1, n_rep = 1, apply_cross_fitting = FALSE) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls) assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls[[1]]) assert_resampling_pars(dml_plr_drawn, dml_plr_set) - + dml_plr_drawn = DoubleMLPLR$new(dml_data, ml_g, ml_m, - n_folds=2, n_rep=1, apply_cross_fitting=FALSE) + n_folds = 2, n_rep = 1, apply_cross_fitting = FALSE) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls) assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls[[1]]) assert_resampling_pars(dml_plr_drawn, dml_plr_set) - + dml_plr_drawn = DoubleMLPLR$new(dml_data, ml_g, ml_m, - n_folds=2, n_rep=1, apply_cross_fitting=TRUE) + n_folds = 2, n_rep = 1, apply_cross_fitting = TRUE) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls) assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls[[1]]) assert_resampling_pars(dml_plr_drawn, dml_plr_set) - + dml_plr_drawn = DoubleMLPLR$new(dml_data, ml_g, ml_m, - n_folds=5, n_rep=1, apply_cross_fitting=TRUE) + n_folds = 5, n_rep = 1, apply_cross_fitting = TRUE) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls) assert_resampling_pars(dml_plr_drawn, dml_plr_set) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls[[1]]) assert_resampling_pars(dml_plr_drawn, dml_plr_set) - + dml_plr_drawn = DoubleMLPLR$new(dml_data, ml_g, ml_m, - n_folds=5, n_rep=3, apply_cross_fitting=TRUE) + n_folds = 5, n_rep = 3, apply_cross_fitting = TRUE) dml_plr_set$set_sample_splitting(dml_plr_drawn$smpls) assert_resampling_pars(dml_plr_drawn, dml_plr_set) - } +} ) test_that("Unit tests for the method set_sample_splitting of class DoubleML (invalid sets)", { - smpls = list(list(train_ids = list(c(1, 2.2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) - msg = paste("Assertion on 'train_ids' failed: Must be a subset of", - "\\{'1','2','3','4','5','6','7','8','9','10'\\},", - "but is \\{'1','2.2','3','4','5'\\}.") + smpls = list( + list( + train_ids = list(c(1, 2.2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + msg = paste( + "Assertion on 'train_ids' failed: Must be a subset of", + "\\{'1','2','3','4','5','6','7','8','9','10'\\},", + "but is \\{'1','2.2','3','4','5'\\}.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), c(2, 4.5, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) - msg = paste("Assertion on 'train_ids' failed: Must be a subset of", - "\\{'1','2','3','4','5','6','7','8','9','10'\\},", - "but is \\{'2','4.5','6','8','10'\\}.") + regexp = msg) + + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 5, 7, 9), c(2, 4.5, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + msg = paste( + "Assertion on 'train_ids' failed: Must be a subset of", + "\\{'1','2','3','4','5','6','7','8','9','10'\\},", + "but is \\{'2','4.5','6','8','10'\\}.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 4, 5, 7, 9), c(2, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) - msg = paste("Assertion on 'smpl\\$train_ids\\[\\[i_fold\\]\\]' failed:", - "Must be disjunct from set \\{'2','4','6','8','10'\\},", - "but has \\{'4'\\}.") + regexp = msg) + + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 4, 5, 7, 9), c(2, 4, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + msg = paste( + "Assertion on 'smpl\\$train_ids\\[\\[i_fold\\]\\]' failed:", + "Must be disjunct from set \\{'2','4','6','8','10'\\},", + "but has \\{'4'\\}.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) - msg = paste("Assertion on 'test_ids' failed:", - "Contains duplicated values, position 4.") + regexp = msg) + + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + msg = paste( + "Assertion on 'test_ids' failed:", + "Contains duplicated values, position 4.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) - msg = paste("Assertion on 'train_ids' failed:", - "Contains duplicated values, position 2.") + regexp = msg) + + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + msg = paste( + "Assertion on 'train_ids' failed:", + "Contains duplicated values, position 2.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5, 20), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) - msg = paste("Assertion on 'train_ids' failed: Must be a subset of", - "\\{'1','2','3','4','5','6','7','8','9','10'\\},", - "but is \\{'1','2','3','4','5','20'\\}.") + regexp = msg) + + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5, 20), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, 7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + msg = paste( + "Assertion on 'train_ids' failed: Must be a subset of", + "\\{'1','2','3','4','5','6','7','8','9','10'\\},", + "but is \\{'1','2','3','4','5','20'\\}.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - - smpls = list(list(train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), - test_ids = list(c(6, -7, 8, 9, 10), c(1, 2, 3, 4, 5))), - list(train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), - test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) - msg = paste("Assertion on 'test_ids' failed: Must be a subset of", - "\\{'1','2','3','4','5','6','7','8','9','10'\\},", - "but is \\{'6','-7','8','9','10'\\}.") + regexp = msg) + + smpls = list( + list( + train_ids = list(c(1, 2, 3, 4, 5), c(6, 7, 8, 9, 10)), + test_ids = list(c(6, -7, 8, 9, 10), c(1, 2, 3, 4, 5))), + list( + train_ids = list(c(1, 3, 5, 7, 9), c(2, 4, 6, 8, 10)), + test_ids = list(c(2, 4, 6, 8, 10), c(1, 3, 5, 7, 9)))) + msg = paste( + "Assertion on 'test_ids' failed: Must be a subset of", + "\\{'1','2','3','4','5','6','7','8','9','10'\\},", + "but is \\{'6','-7','8','9','10'\\}.") expect_error(dml_plr$set_sample_splitting(smpls), - regexp = msg) - } -) \ No newline at end of file + regexp = msg) +} +) From a9ad4759b036a58c4ef90554f311223d93d7a128 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Thu, 26 Aug 2021 12:14:51 +0200 Subject: [PATCH 15/16] fix test for partial Z case without covariates, apply styler --- R/datasets.R | 51 +++---- R/double_ml_pliv.R | 134 +++++++++--------- tests/testthat/helper-03-dgp.R | 11 +- tests/testthat/helper-04-simdata.R | 9 +- tests/testthat/helper-14-dml_pliv_partial_z.R | 38 ++--- tests/testthat/test-double_ml_datasets.R | 4 +- ...uble_ml_pliv_partial_z_parameter_passing.R | 38 ++--- .../test-double_ml_pliv_partial_z_with_x.R | 39 ++--- tests/testthat/test-double_ml_pliv_tuning.R | 40 +++--- 9 files changed, 188 insertions(+), 176 deletions(-) diff --git a/R/datasets.R b/R/datasets.R index e58d25c3..6e5a348c 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -563,13 +563,13 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, #' \eqn{y_i = \beta d_i + e_i,} #' #' \eqn{d_i = z_i'\Pi + v_i,} -#' +#' #' with i.i.d. -#' +#' #' \eqn{(e_i, v_i) \sim \mathcal{N} \left(0, \left( \begin{array}{cc} #' \sigma^2_e & \sigma_{ev} \\ \sigma_{ev} & \sigma^2_v\end{array} #' \right) \right),} -#' +#' #' with \eqn{\beta} being the parameter of interests and #' \eqn{\Pi = C \cdot \left(\pi_0^0, \pi_0^1, \pi_0^2 \ldots, \pi_0^{p_z - 1} #' \right)}, instrumental variables \eqn{z_i = (z_{i1}, \ldots, z_{ip_z})} drawn @@ -582,12 +582,12 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, #' \ldots , 0 \right)}. The constant \eqn{C} is calibrated internally such that #' the concentration parameter \eqn{\mu^2} is set to a specific value specified #' via `mu2`. -#' +#' #' Default values are set to \eqn{\rho = 0.5}, \eqn{\sigma^2_e = 1}, #' \eqn{\sigma^2_z = 1} and \eqn{Corr(e,v) = 0.6}. For the coefficient vectors #' defaults are set such that \eqn{\beta = 1}, \eqn{\mu^2 = 30} and #' \eqn{\pi_0 = 0.7}. -#' +#' #' @references Belloni, A., Chen, D., Chernozhukov, V., and Hansen, C. (2012), #' Sparse Models and Methods for Optimal Instruments with an Application to #' Eminent Domain. Econometrica, 80 (6): 2369-2429. @@ -600,25 +600,25 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, #' #' @param dim_z (`integer(1)`) \cr #' The number of instruments. -#' +#' #' @param pi_0 (`numeric(1)`) \cr #' Coefficient vector in first-stage equation. -#' +#' #' @param s (`integer(1)`) \cr #' Sparsity index. -#' +#' #' @param mu2 (`numeric(1)`) \cr #' Value of concentration parameter used for calibration of constant \eqn{C}. -#' +#' #' @param rho (`numeric(1)`) \cr #' Coefficient determining correlation between instruments. -#' +#' #' @param sigma_z (`numeric(1)`) \cr #' Standard deviation of instruments. -#' +#' #' @param corr (`numeric(1)`) \cr #' Correlation between errors \eqn{e} and \eqn{v}. -#' +#' #' @param sigma_e (`numeric(1)`) \cr #' Standard deviation for error \eqn{e}. #' @@ -634,12 +634,13 @@ make_pliv_CHS2015 = function(n_obs, alpha = 1, dim_x = 200, dim_z = 150, #' #' @export make_pliv_BCCH2012 = function(n_obs = 100, beta = 1, dim_z = 100, pi_0 = 0.7, - s = 0, mu2 = 30, - rho = 0.5, sigma_z = 1, - corr = 0.6, sigma_e = 1, - return_type = "DoubleMLData") { + s = 0, mu2 = 30, + rho = 0.5, sigma_z = 1, + corr = 0.6, sigma_e = 1, + return_type = "DoubleMLData") { # based on https://www.econometricsociety.org/content/supplement-sparse-models-and-methods-optimal-instruments-application-eminent-domain-1 and # http://qed.econ.queensu.ca/jae/datasets/spindler001/ + assert_count(n_obs) assert_numeric(beta, len = 1) assert_count(dim_z) @@ -653,28 +654,28 @@ make_pliv_BCCH2012 = function(n_obs = 100, beta = 1, dim_z = 100, pi_0 = 0.7, assert_choice( return_type, c("data.table", "matrix", "data.frame", "DoubleMLData")) - + sigma_z = toeplitz(rho^(0:(dim_z - 1))) mu_z = rep(0, dim_z) z = rmvnorm(n = n_obs, mean = mu_z, sigma = sigma_z) - pi = pi_0^(0:(dim_z-1)) - - scale = c(sqrt(mu2/((n_obs + mu2)*pi %*% sigma_z %*% pi))) - sigma_v = sqrt(1 - (scale^2)*t(pi) %*% sigma_z %*% pi) + pi = pi_0^(0:(dim_z - 1)) + + scale = c(sqrt(mu2 / ((n_obs + mu2) * pi %*% sigma_z %*% pi))) + sigma_v = sqrt(1 - (scale^2) * t(pi) %*% sigma_z %*% pi) sev = corr * sigma_e * sigma_v - + sigma_e_v = matrix(c(sigma_e^2, sev, sev, sigma_v^2), ncol = 2) mu_e_v = rep(0, 2) e_v = rmvnorm(n = n_obs, mean = mu_e_v, sigma = sigma_e_v) e = e_v[, 1] v = e_v[, 2] - + if (s > 0) { - pi[(s+1):dim_z] = 0 + pi[(s + 1):dim_z] = 0 } d = scale * z %*% pi + v y = beta * d + e - + if (return_type == "matrix") { return(list("y" = y, "d" = d, "z" = z)) } else { diff --git a/R/double_ml_pliv.R b/R/double_ml_pliv.R index 219b4849..aa5307ce 100644 --- a/R/double_ml_pliv.R +++ b/R/double_ml_pliv.R @@ -448,61 +448,63 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", # nuisance r d = self$data$data_model[[self$data$treat_col]] y = self$data$data_model[[self$data$y_col]] - + if (test_character(self$data$x_cols, len = 0)) { r_hat = dml_cv_predict(self$learner$ml_r, - c( - self$data$x_cols, - self$data$other_treat_cols, - self$data$z_cols), - self$data$treat_col, - self$data$data_model, - nuisance_id = "nuis_r", - smpls = smpls, - est_params = self$get_params("ml_r"), - return_train_preds = FALSE, - learner_class = private$learner_class$ml_r, - fold_specific_params = - private$fold_specific_params) + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + self$data$treat_col, + self$data$data_model, + nuisance_id = "nuis_r", + smpls = smpls, + est_params = self$get_params("ml_r"), + return_train_preds = FALSE, + learner_class = private$learner_class$ml_r, + fold_specific_params = + private$fold_specific_params) } else { # Partial out Xs from y and d by using linear regression task_part_y = initiate_task("lm_part_out_y", self$data$data_model, - target = self$data$y_col, - select_cols = c(self$data$x_cols, - self$data$other_treat_cols), - "LearnerRegr") + target = self$data$y_col, + select_cols = c( + self$data$x_cols, + self$data$other_treat_cols), + "LearnerRegr") learner_lm = LearnerRegrLM$new() resampling_part_y = rsmp("insample")$instantiate(task_part_y) r_part_y = resample(task_part_y, learner_lm, resampling_part_y, - store_models = TRUE) + store_models = TRUE) y_tilde = y - as.data.table(r_part_y$prediction())$response - + task_part_d = initiate_task("lm_part_out_d", self$data$data_model, - target = self$data$treat_col, - select_cols = c(self$data$x_cols, - self$data$other_treat_cols), - "LearnerRegr") + target = self$data$treat_col, + select_cols = c( + self$data$x_cols, + self$data$other_treat_cols), + "LearnerRegr") resampling_part_d = rsmp("insample")$instantiate(task_part_d) r_part_d = resample(task_part_d, learner_lm, resampling_part_d, - store_models = TRUE) + store_models = TRUE) d_tilde = d - as.data.table(r_part_d$prediction())$response data_aux = data.table(self$data$data_model, "d_tilde" = d_tilde) r_hat = dml_cv_predict(self$learner$ml_r, - c( - self$data$x_cols, - self$data$other_treat_cols, - self$data$z_cols), - "d_tilde", - data_aux, - nuisance_id = "nuis_r", - smpls = smpls, - est_params = self$get_params("ml_r"), - return_train_preds = FALSE, - learner_class = private$learner_class$ml_r, - fold_specific_params = - private$fold_specific_params) - } + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + "d_tilde", + data_aux, + nuisance_id = "nuis_r", + smpls = smpls, + est_params = self$get_params("ml_r"), + return_train_preds = FALSE, + learner_class = private$learner_class$ml_r, + fold_specific_params = + private$fold_specific_params) + } if (is.character(self$score)) { if (self$score == "partialling out") { if (test_character(self$data$x_cols, len = 0)) { @@ -701,7 +703,6 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", }, ml_nuisance_tuning_partialZ = function(smpls, param_set, tune_settings, tune_on_folds, ...) { - if (test_character(self$data$x_cols, len = 0)) { if (!tune_on_folds) { data_tune_list = list(self$data$data_model) @@ -711,30 +712,31 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", function(x) extract_training_data(self$data$data_model, x)) } tuning_result_r = dml_tune(self$learner$ml_r, - c( - self$data$x_cols, - self$data$other_treat_cols, - self$data$z_cols), - self$data$treat_col, data_tune_list, - nuisance_id = "nuis_r", - param_set$ml_r, tune_settings, - tune_settings$measure$ml_r, - private$learner_class$ml_r) + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + self$data$treat_col, data_tune_list, + nuisance_id = "nuis_r", + param_set$ml_r, tune_settings, + tune_settings$measure$ml_r, + private$learner_class$ml_r) } else { # Partial out Xs from d by using linear regression task_part_d = initiate_task("lm_part_out_d", self$data$data_model, - target = self$data$treat_col, - select_cols = c(self$data$x_cols, - self$data$other_treat_cols), - "LearnerRegr") + target = self$data$treat_col, + select_cols = c( + self$data$x_cols, + self$data$other_treat_cols), + "LearnerRegr") resampling_part_d = rsmp("insample")$instantiate(task_part_d) learner_lm = LearnerRegrLM$new() r_part_d = resample(task_part_d, learner_lm, resampling_part_d, - store_models = TRUE) - d_tilde = self$data$data_model[[self$data$treat_col]] - + store_models = TRUE) + d_tilde = self$data$data_model[[self$data$treat_col]] - as.data.table(r_part_d$prediction())$response data_aux = data.table(self$data$data_model, "d_tilde" = d_tilde) - + if (!tune_on_folds) { data_tune_list = list(data_aux) } else { @@ -742,17 +744,17 @@ DoubleMLPLIV = R6Class("DoubleMLPLIV", smpls$train_ids, function(x) extract_training_data(data_aux, x)) } - + tuning_result_r = dml_tune(self$learner$ml_r, - c( - self$data$x_cols, - self$data$other_treat_cols, - self$data$z_cols), - "d_tilde", data_tune_list, - nuisance_id = "nuis_r", - param_set$ml_r, tune_settings, - tune_settings$measure$ml_r, - private$learner_class$ml_r) + c( + self$data$x_cols, + self$data$other_treat_cols, + self$data$z_cols), + "d_tilde", data_tune_list, + nuisance_id = "nuis_r", + param_set$ml_r, tune_settings, + tune_settings$measure$ml_r, + private$learner_class$ml_r) } tuning_result = list("ml_r" = list(tuning_result_r, diff --git a/tests/testthat/helper-03-dgp.R b/tests/testthat/helper-03-dgp.R index 53017363..d7ce84b5 100644 --- a/tests/testthat/helper-03-dgp.R +++ b/tests/testthat/helper-03-dgp.R @@ -133,7 +133,8 @@ dgp1_toeplitz = function(n, p, betamax = 4, decay = 0.99, threshold = 0, noiseva return(data) } -make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150) { +make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150, + return_x_vars = TRUE) { sigma_e_u = matrix(c(1, 0.6, 0.6, 1), ncol = 2) mu_e_u = rep(0, 2) @@ -161,12 +162,16 @@ make_data_pliv_partialZ = function(n_obs, alpha = 1, dim_x = 5, dim_z = 150) { d = x %*% gamma + z %*% delta + u y = alpha * d + x %*% beta + epsilon - colnames(x) = paste0("X", 1:dim_x) colnames(z) = paste0("Z", 1:dim_z) colnames(y) = "y" colnames(d) = "d" - data = data.frame(x, y, d, z) + if (return_x_vars) { + data = data.frame(x, y, d, z) + } else { + data = data.frame(y, d, z) + } + return(data) } diff --git a/tests/testthat/helper-04-simdata.R b/tests/testthat/helper-04-simdata.R index a53449ae..4d79a0c5 100644 --- a/tests/testthat/helper-04-simdata.R +++ b/tests/testthat/helper-04-simdata.R @@ -122,7 +122,8 @@ df = make_data_pliv_partialZ( return_x_vars = TRUE) Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE] dml_data = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) -data_pliv_partialZ_with_X = list(df = df, - dml_data = dml_data) \ No newline at end of file + y_col = "y", + d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)) +data_pliv_partialZ_with_X = list( + df = df, + dml_data = dml_data) diff --git a/tests/testthat/helper-14-dml_pliv_partial_z.R b/tests/testthat/helper-14-dml_pliv_partial_z.R index 09b26f75..bbde349b 100644 --- a/tests/testthat/helper-14-dml_pliv_partial_z.R +++ b/tests/testthat/helper-14-dml_pliv_partial_z.R @@ -11,9 +11,9 @@ dml_pliv_partial_z = function(data, y, d, z, all_thetas = all_ses = rep(NA_real_, n_rep) all_preds = list() - + # check whether data contains Xs - x_indx = names(data)[! (names(data) %in% c(y,d,z))] + x_indx = names(data)[!(names(data) %in% c(y, d, z))] if (length(x_indx) != 0) { formula_rhs = paste0(x_indx, collapse = " + ") lm_y = lm(paste0("y ~ ", formula_rhs), data) @@ -22,7 +22,7 @@ dml_pliv_partial_z = function(data, y, d, z, lm_d = lm(paste0("d ~ ", formula_rhs), data) data$d_tilde = data[, d] - predict(lm_d) } - + for (i_rep in 1:n_rep) { this_smpl = smpls[[i_rep]] @@ -37,15 +37,15 @@ dml_pliv_partial_z = function(data, y, d, z, this_smpl, all_preds[[i_rep]]) r_hat = residuals$r_hat - - if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { + + if (all(!(names(data) %in% c("y_tilde", "d_tilde")))) { D = data[, d] Y = data[, y] } else { D = data[, "d_tilde"] Y = data[, "y_tilde"] } - + # DML 1 if (dml_procedure == "dml1") { thetas = vars = rep(NA_real_, n_folds) @@ -92,7 +92,7 @@ dml_pliv_partial_z = function(data, y, d, z, res = list( coef = theta, se = se, t = t, pval = pval, thetas = all_thetas, ses = all_ses, - all_preds=all_preds, smpls=smpls, + all_preds = all_preds, smpls = smpls, data_with_res = data) return(res) @@ -105,10 +105,10 @@ fit_nuisance_pliv_partial_z = function(data, y, d, z, train_ids = smpls$train_ids test_ids = smpls$test_ids - - if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { + + if (all(!(names(data) %in% c("y_tilde", "d_tilde")))) { # case without Xs - + # nuisance r: E[D|X] r_indx = names(data) != y data_r = data[, r_indx, drop = FALSE] @@ -116,27 +116,27 @@ fit_nuisance_pliv_partial_z = function(data, y, d, z, if (!is.null(params_r)) { ml_r$param_set$values = params_r } - + resampling_r = mlr3::rsmp("custom") resampling_r$instantiate(task_r, train_ids, test_ids) - + r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE) r_hat_list = lapply(r_r$predictions(), function(x) x$response) } else { - r_indx = ! (names(data) %in% c(y, d, "y_tilde")) + r_indx = !(names(data) %in% c(y, d, "y_tilde")) data_r = data[, r_indx, drop = FALSE] task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", d), backend = data_r, target = "d_tilde") if (!is.null(params_r)) { ml_r$param_set$values = params_r } - + resampling_r = mlr3::rsmp("custom") resampling_r$instantiate(task_r, train_ids, test_ids) - + r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE) r_hat_list = lapply(r_r$predictions(), function(x) x$response) } - + all_preds = list( r_hat_list = r_hat_list) return(all_preds) @@ -187,15 +187,15 @@ bootstrap_pliv_partial_z = function(theta, se, data, y, d, z, n_folds, smpls, smpls[[i_rep]], all_preds[[i_rep]]) r_hat = residuals$r_hat - - if (all(! (names(data) %in% c("y_tilde", "d_tilde")))) { + + if (all(!(names(data) %in% c("y_tilde", "d_tilde")))) { D = data[, d] Y = data[, y] } else { D = data[, "d_tilde"] Y = data[, "y_tilde"] } - + psi = (Y - D * theta[i_rep]) * r_hat psi_a = -r_hat * D diff --git a/tests/testthat/test-double_ml_datasets.R b/tests/testthat/test-double_ml_datasets.R index 008f5ae9..c1e6ec39 100644 --- a/tests/testthat/test-double_ml_datasets.R +++ b/tests/testthat/test-double_ml_datasets.R @@ -39,8 +39,8 @@ patrick::with_parameters_test_that("Unit tests for datasets functionalities:", expect_is(df$d, "matrix") expect_is(df$z, "matrix") } - - # Test BCCH2012 + + # Test BCCH2012 if (return_type != "matrix") { df = make_pliv_BCCH2012(return_type = return_type) expect_is(df, paste0(return_type)) diff --git a/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R b/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R index 7c2941cb..63604016 100644 --- a/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R +++ b/tests/testthat/test-double_ml_pliv_partial_z_parameter_passing.R @@ -29,12 +29,12 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df - - if (! with_x) { + + if (!with_x) { x_indx = grep("X", names(df)) - df = df[, - x_indx, drop = FALSE] + df = df[, -x_indx, drop = FALSE] } - + set.seed(3141) pliv_hat = dml_pliv_partial_z(df, y = "y", d = "d", z = c("z", "z2"), @@ -44,22 +44,22 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - - if (! with_x) { + + if (!with_x) { df_boot = df } else { df_boot = pliv_hat$data_with_res } - + set.seed(3141) boot_theta = bootstrap_pliv_partial_z(pliv_hat$thetas, pliv_hat$ses, - df_boot, - y = "y", d = "d", z = c("z", "z2"), - n_folds = n_folds, n_rep = n_rep, - smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef - + df_boot, + y = "y", d = "d", z = c("z", "z2"), + n_folds = n_folds, n_rep = n_rep, + smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + set.seed(3141) Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] @@ -85,7 +85,7 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par # bootstrap set.seed(3141) - dml_pliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + dml_pliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = dml_pliv_obj$boot_coef expect_equal(theta, theta_obj, tolerance = 1e-8) @@ -101,12 +101,12 @@ patrick::with_parameters_test_that("Unit tests for parameter passing of PLIV.par learner_pars = get_default_mlmethod_pliv(learner) df = data_pliv$df - - if (! with_x) { + + if (!with_x) { x_indx = grep("X", names(df)) - df = df[, - x_indx, drop = FALSE] + df = df[, -x_indx, drop = FALSE] } - + # Passing for non-cross-fitting case set.seed(3141) my_task = Task$new("help task", "regr", data_pliv$df) diff --git a/tests/testthat/test-double_ml_pliv_partial_z_with_x.R b/tests/testthat/test-double_ml_pliv_partial_z_with_x.R index 1e157b95..2b2e23f3 100644 --- a/tests/testthat/test-double_ml_pliv_partial_z_with_x.R +++ b/tests/testthat/test-double_ml_pliv_partial_z_with_x.R @@ -25,24 +25,24 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", dml_procedure = dml_procedure, score = score) theta = pliv_hat$coef se = pliv_hat$se - + # data with residuals data_with_res = pliv_hat$data_with_res - + set.seed(3141) boot_theta = bootstrap_pliv_partial_z(pliv_hat$thetas, pliv_hat$ses, - data_with_res, - y = "y", d = "d", z = paste0("Z", 1:dim_z), - n_folds = 5, smpls = pliv_hat$smpls, - all_preds= pliv_hat$all_preds, - bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef - + data_with_res, + y = "y", d = "d", z = paste0("Z", 1:dim_z), + n_folds = 5, smpls = pliv_hat$smpls, + all_preds = pliv_hat$all_preds, + bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef + set.seed(3141) double_mlpliv_obj = DoubleMLPLIV.partialZ(data_pliv_partialZ_with_X$dml_data, - ml_r = learner_pars$ml_r$clone(), - n_folds = 5, - score = score, - dml_procedure = dml_procedure) + ml_r = learner_pars$ml_r$clone(), + n_folds = 5, + score = score, + dml_procedure = dml_procedure) double_mlpliv_obj$fit() theta_obj = double_mlpliv_obj$coef @@ -50,7 +50,7 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", # bootstrap set.seed(3141) - double_mlpliv_obj$bootstrap(method = 'normal', n_rep = n_rep_boot) + double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot) boot_theta_obj = double_mlpliv_obj$boot_coef # at the moment the object result comes without a name @@ -61,13 +61,16 @@ patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:", ) test_that("Unit tests for PLIV.partialZ invalid score", { - msg = paste("Callable score not implemented for DoubleMLPLIV with", - "partialX=FALSE and partialZ=TRUE.") + msg = paste( + "Callable score not implemented for DoubleMLPLIV with", + "partialX=FALSE and partialZ=TRUE.") double_mlplr_obj <- DoubleMLPLIV.partialZ( data_pliv_partialZ$dml_data, - ml_r = mlr3::lrn('regr.rpart'), - score = function(x) return(mean(x))) + ml_r = mlr3::lrn("regr.rpart"), + score = function(x) { + return(mean(x)) + }) expect_error(double_mlplr_obj$fit(), - regexp = msg) + regexp = msg) } ) diff --git a/tests/testthat/test-double_ml_pliv_tuning.R b/tests/testthat/test-double_ml_pliv_tuning.R index 83655158..fc3e3f04 100644 --- a/tests/testthat/test-double_ml_pliv_tuning.R +++ b/tests/testthat/test-double_ml_pliv_tuning.R @@ -69,7 +69,7 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", z_cols = z_vars[[z_indx]] set.seed(3141) df = data_pliv$df - + Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE] data_ml = double_ml_data_from_data_frame(df, y_col = "y", @@ -127,18 +127,18 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", # } # if (data_ml$n_instr > 1) { - + # Case without X's set.seed(3141) data_ml_noX = double_ml_data_from_data_frame(df, - y_col = "y", - d_cols = "d", x_cols = character(0), z_cols = z_cols) + y_col = "y", + d_cols = "d", x_cols = character(0), z_cols = z_cols) double_mlpliv_obj_tuned_Z_noX = DoubleMLPLIV.partialZ(data_ml_noX, - n_folds = n_folds, - ml_r = learner, - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep) + n_folds = n_folds, + ml_r = learner, + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) param_grid_r = list("ml_r" = param_grid[["ml_r"]]) tune_settings_r = tune_settings tune_settings_r$measure$ml_g = tune_settings_r$measure$ml_m = NULL @@ -146,22 +146,22 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", param_set = param_grid_r, tune_on_folds = tune_on_folds, tune_settings = tune_settings_r) double_mlpliv_obj_tuned_Z_noX$fit() - + theta_obj_tuned_Z_noX = double_mlpliv_obj_tuned_Z_noX$coef se_obj_tuned_Z_noX = double_mlpliv_obj_tuned_Z_noX$se - + expect_is(theta_obj_tuned_Z_noX, "numeric") expect_is(se_obj_tuned_Z_noX, "numeric") - + # Case with X's set.seed(3141) double_mlpliv_obj_tuned_Z = DoubleMLPLIV.partialZ(data_ml, - n_folds = n_folds, - ml_r = learner, - dml_procedure = dml_procedure, - score = score, - n_rep = n_rep) - + n_folds = n_folds, + ml_r = learner, + dml_procedure = dml_procedure, + score = score, + n_rep = n_rep) + param_grid_r = list("ml_r" = param_grid[["ml_r"]]) tune_settings_r = tune_settings tune_settings_r$measure$ml_g = tune_settings_r$measure$ml_m = NULL @@ -169,10 +169,10 @@ patrick::with_parameters_test_that("Unit tests for tuning of PLIV", param_set = param_grid_r, tune_on_folds = tune_on_folds, tune_settings = tune_settings_r) double_mlpliv_obj_tuned_Z$fit() - + theta_obj_tuned_Z = double_mlpliv_obj_tuned_Z$coef se_obj_tuned_Z = double_mlpliv_obj_tuned_Z$se - + expect_is(theta_obj_tuned_Z, "numeric") expect_is(se_obj_tuned_Z, "numeric") From 823116b07bb793696d8f43bcab621fb2548f1cf7 Mon Sep 17 00:00:00 2001 From: Philipp Bach Date: Thu, 26 Aug 2021 12:53:41 +0200 Subject: [PATCH 16/16] fix doubleml data test --- tests/testthat/test-double_ml_data.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-double_ml_data.R b/tests/testthat/test-double_ml_data.R index c1362252..d4949df1 100644 --- a/tests/testthat/test-double_ml_data.R +++ b/tests/testthat/test-double_ml_data.R @@ -254,7 +254,7 @@ test_that("Unit tests for DoubleMLData", { D10_1d_setd = D10_1d$clone()$set_data_model("d2") - msg1 = "At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\). Consider using parameter 'use_other_treat_as_covariate'." + msg1 = "At least one variable/column is set as treatment variable \\('d_cols'\\) and as a covariate \\('x_cols'\\). Consider using parameter 'use_other_treat_as_covariate'." expect_error(double_ml_data_from_data_frame(data, x_cols = X_cols1, @@ -271,7 +271,7 @@ test_that("Unit tests for DoubleMLData", { z_cols = c(z_indx, X_cols1[1])), regexp = msg2) - msg3 = "y cannot be set as outcome variable `y_col` and covariate in 'x_cols'." + msg3 = "y cannot be set as outcome variable 'y_col' and covariate in 'x_cols'." expect_error(double_ml_data_from_data_frame(data, x_cols = c(y_indx, X_cols1), y_col = y_indx, @@ -325,7 +325,7 @@ test_that("Unit tests for DoubleMLData", { expect_identical(D9$data_model, D9_noXcols$data_model) # Exception handling - msg8 = "At least one variable/column is set as treatment variable \\(`d_cols`\\) and as a covariate \\(`x_cols`\\). Consider using parameter 'use_other_treat_as_covariate'." + msg8 = "At least one variable/column is set as treatment variable \\('d_cols'\\) and as a covariate \\('x_cols'\\). Consider using parameter 'use_other_treat_as_covariate'." expect_error(DoubleMLData$new(data, x_cols = X_cols1, y_col = y_indx, @@ -341,7 +341,7 @@ test_that("Unit tests for DoubleMLData", { z_cols = c(z_indx, X_cols1[1])), regexp = msg9) - msg10 = "y cannot be set as outcome variable `y_col` and covariate in 'x_cols'." + msg10 = "y cannot be set as outcome variable 'y_col' and covariate in 'x_cols'." expect_error(DoubleMLData$new(data, x_cols = c(y_indx, X_cols1), y_col = y_indx,