diff --git a/.Rbuildignore b/.Rbuildignore index 8d35a0a..41b889c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,4 @@ README deltaSD.png ^LICENSE\.md$ .github +vignettes/precompute.R diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 428757c..639ddc9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,8 +15,8 @@ jobs: strategy: matrix: config: - - {os: macos-latest, r: 'release'} - {os: ubuntu-latest, r: 'release'} + - {os: macos-latest, r: 'release'} steps: - name: Checkout code @@ -53,6 +53,8 @@ jobs: uses: r-lib/actions/setup-tinytex@v2 - name: Check build + env: + HONESTDID_RUN_TESTS: '1' run: | devtools::document() devtools::check() diff --git a/R/arp-nuisance.R b/R/arp-nuisance.R index 9b6e062..782ec5c 100644 --- a/R/arp-nuisance.R +++ b/R/arp-nuisance.R @@ -534,16 +534,19 @@ lpDualSoln = .lp_dual_fn(y_T = y_T_ARP, X_T = X_T_ARP, eta = linSoln$eta_star, gamma_tilde = linSoln$lambda, sigma = sigma_ARP) - sigma_B_dual = base::sqrt( base::t(lpDualSoln$gamma_tilde) %*% sigma_ARP %*% lpDualSoln$gamma_tilde) + sigma_B_dual2 = base::t(lpDualSoln$gamma_tilde) %*% sigma_ARP %*% lpDualSoln$gamma_tilde #If sigma_B_dual is 0 to numerical precision, reject iff eta > 0 - if(sigma_B_dual < 10^(-10)){ + if ( base::abs(sigma_B_dual2) < .Machine$double.eps ) { base::return(base::list(reject = base::ifelse(linSoln$eta_star > 0, 1, 0), eta = linSoln$eta_star, delta = linSoln$delta_star, lambda = linSoln$lambda)) + } else if ( sigma_B_dual2 < 0 ) { + base::stop(".vlo_vup_dual_fn returned a negative variance") } + sigma_B_dual = base::sqrt(sigma_B_dual2) maxstat = lpDualSoln$eta/sigma_B_dual # HYBRID: Modify vlo, vup for the hybrid test diff --git a/R/honest_sunab.R b/R/honest_sunab.R index 4d82fce..e02517e 100644 --- a/R/honest_sunab.R +++ b/R/honest_sunab.R @@ -1,36 +1,38 @@ +#' @title sunab_beta_vcv +#' #' @description #' This function takes a regression estimated using fixest with the sunab option #' and extracts the aggregated event-study coefficients and their variance-covariance matrix +#' #' @param sunab_fixest The result of a fixest call using the sunab option +#' #' @returns A list containing beta (the event-study coefficients), #' sigma (the variance-covariance matrix), and #' cohorts (the relative times corresponding to beta, sigma) - -sunab_beta_vcv <- -function(sunab_fixest){ +sunab_beta_vcv <- function(sunab_fixest) { ## The following code block extracts the weights on individual coefs used in # the fixest aggregation ## sunab_agg <- sunab_fixest$model_matrix_info$sunab$agg_period - sunab_names <- names(sunab_fixest$coefficients) - sunab_sel <- grepl(sunab_agg, sunab_names, perl=TRUE) + sunab_names <- base::names(sunab_fixest$coefficients) + sunab_sel <- base::grepl(sunab_agg, sunab_names, perl=TRUE) sunab_names <- sunab_names[sunab_sel] - if(!is.null(sunab_fixest$weights)){ - sunab_wgt <- colSums(sunab_fixest$weights * sign(model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) + if(!base::is.null(sunab_fixest$weights)){ + sunab_wgt <- base::colSums(sunab_fixest$weights * base::sign(stats::model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) } else { - sunab_wgt <- colSums(sign(model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) + sunab_wgt <- base::colSums(base::sign(stats::model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) } #Construct matrix sunab_trans such that sunab_trans %*% non-aggregated coefs = aggregated coefs, - sunab_cohorts <- as.numeric(gsub(paste0(".*", sunab_agg, ".*"), "\\2", sunab_names, perl=TRUE)) - sunab_mat <- model.matrix(~ 0 + factor(sunab_cohorts)) - sunab_trans <- solve(t(sunab_mat) %*% (sunab_wgt * sunab_mat)) %*% t(sunab_wgt * sunab_mat) + sunab_cohorts <- base::as.numeric(base::gsub(base::paste0(".*", sunab_agg, ".*"), "\\2", sunab_names, perl=TRUE)) + sunab_mat <- stats::model.matrix(~ 0 + base::factor(sunab_cohorts)) + sunab_trans <- base::solve(base::t(sunab_mat) %*% (sunab_wgt * sunab_mat)) %*% base::t(sunab_wgt * sunab_mat) #Get the coefs and vcv - sunab_coefs <- sunab_trans %*% cbind(sunab_fixest$coefficients[sunab_sel]) - sunab_vcov <- sunab_trans %*% sunab_fixest$cov.scaled[sunab_sel, sunab_sel] %*% t(sunab_trans) + sunab_coefs <- sunab_trans %*% base::cbind(sunab_fixest$coefficients[sunab_sel]) + sunab_vcov <- sunab_trans %*% sunab_fixest$cov.scaled[sunab_sel, sunab_sel] %*% base::t(sunab_trans) - return(list(beta = sunab_coefs, - sigma = sunab_vcov, - cohorts = sort(unique(sunab_cohorts)))) + base::return(base::list(beta = sunab_coefs, + sigma = sunab_vcov, + cohorts = base::sort(base::unique(sunab_cohorts)))) } diff --git a/README.md b/README.md index 41a32de..dea8bfe 100644 --- a/README.md +++ b/README.md @@ -383,27 +383,27 @@ function(sunab_fixest){ ## The following code block extracts the weights on individual coefs used in # the fixest aggregation ## sunab_agg <- sunab_fixest$model_matrix_info$sunab$agg_period - sunab_names <- names(sunab_fixest$coefficients) - sunab_sel <- grepl(sunab_agg, sunab_names, perl=TRUE) + sunab_names <- base::names(sunab_fixest$coefficients) + sunab_sel <- base::grepl(sunab_agg, sunab_names, perl=TRUE) sunab_names <- sunab_names[sunab_sel] - if(!is.null(sunab_fixest$weights)){ - sunab_wgt <- colSums(sunab_fixest$weights * sign(model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) + if(!base::is.null(sunab_fixest$weights)){ + sunab_wgt <- base::colSums(sunab_fixest$weights * base::sign(stats::model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) } else { - sunab_wgt <- colSums(sign(model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) + sunab_wgt <- base::colSums(base::sign(stats::model.matrix(sunab_fixest)[, sunab_names, drop=FALSE])) } #Construct matrix sunab_trans such that sunab_trans %*% non-aggregated coefs = aggregated coefs, - sunab_cohorts <- as.numeric(gsub(paste0(".*", sunab_agg, ".*"), "\\2", sunab_names, perl=TRUE)) - sunab_mat <- model.matrix(~ 0 + factor(sunab_cohorts)) - sunab_trans <- solve(t(sunab_mat) %*% (sunab_wgt * sunab_mat)) %*% t(sunab_wgt * sunab_mat) + sunab_cohorts <- base::as.numeric(base::gsub(base::paste0(".*", sunab_agg, ".*"), "\\2", sunab_names, perl=TRUE)) + sunab_mat <- stats::model.matrix(~ 0 + base::factor(sunab_cohorts)) + sunab_trans <- base::solve(base::t(sunab_mat) %*% (sunab_wgt * sunab_mat)) %*% base::t(sunab_wgt * sunab_mat) #Get the coefs and vcv - sunab_coefs <- sunab_trans %*% cbind(sunab_fixest$coefficients[sunab_sel]) - sunab_vcov <- sunab_trans %*% sunab_fixest$cov.scaled[sunab_sel, sunab_sel] %*% t(sunab_trans) + sunab_coefs <- sunab_trans %*% base::cbind(sunab_fixest$coefficients[sunab_sel]) + sunab_vcov <- sunab_trans %*% sunab_fixest$cov.scaled[sunab_sel, sunab_sel] %*% base::t(sunab_trans) - return(list(beta = sunab_coefs, - sigma = sunab_vcov, - cohorts = sort(unique(sunab_cohorts)))) + base::return(base::list(beta = sunab_coefs, + sigma = sunab_vcov, + cohorts = base::sort(base::unique(sunab_cohorts)))) } ``` @@ -475,8 +475,6 @@ honest_did <- function(...) UseMethod("honest_did") #' points for computational reasons. #' @param ... Parameters to pass to `createSensitivityResults` or #' `createSensitivityResults_relativeMagnitudes`. -#' @inheritParams HonestDiD::createSensitivityResults -#' @inheritParams HonestDid::createSensitivityResults_relativeMagnitudes honest_did.AGGTEobj <- function(es, e = 0, type = c("smoothness", "relative_magnitude"), diff --git a/data/VignetteResults.rda b/data/VignetteResults.rda new file mode 100644 index 0000000..07a51c2 Binary files /dev/null and b/data/VignetteResults.rda differ diff --git a/man/VignetteResults.Rd b/man/VignetteResults.Rd new file mode 100644 index 0000000..fcd5940 --- /dev/null +++ b/man/VignetteResults.Rd @@ -0,0 +1,18 @@ +\docType{data} +\name{VignetteResults} +\alias{VignetteResults} +\title{ + Pre-computed results to use in the Vignette. +} +\description{ + This list contains pre-comuted time-intensite results used in the vignette. +} +\format{ + \describe{A list, containing 5 results shown in the vignette; refer to the text for details. + \item{BC_DeltaSDNB_RobustResults}{} + \item{BC_DeltaSDRM_RobustResults}{} + \item{BC_OriginalResults}{} + \item{LW_DeltaSD_RobustResults}{} + \item{LW_DeltaSDD_RobustResults}{} + } +} diff --git a/man/createEventStudyPlot.Rd b/man/createEventStudyPlot.Rd index 0fcdf83..8b472cf 100644 --- a/man/createEventStudyPlot.Rd +++ b/man/createEventStudyPlot.Rd @@ -52,6 +52,7 @@ Rambachan, Ashesh and Jonathan Roth. "An Honest Approach to Parallel Trends." 20 Ashesh Rambachan } \examples{ +\dontrun{ # Simple use case; for more detailed examples, # see createEventStudyPlot(betahat = BCdata_EventStudy$betahat, @@ -62,3 +63,4 @@ Ashesh Rambachan timeVec = BCdata_EventStudy$timeVec, referencePeriod = BCdata_EventStudy$referencePeriod) } +} diff --git a/man/createSensitivityPlot.Rd b/man/createSensitivityPlot.Rd index 7ccab1e..4e5b50b 100644 --- a/man/createSensitivityPlot.Rd +++ b/man/createSensitivityPlot.Rd @@ -39,6 +39,7 @@ Returns ggplot object of the sensitivity plot. Ashesh Rambachan } \examples{ +\dontrun{ # Simple use case; for more detailed examples, # see robustResults <- @@ -55,3 +56,4 @@ Returns ggplot object of the sensitivity plot. alpha = 0.05) createSensitivityPlot(robustResults, originalResults) } +} diff --git a/man/createSensitivityPlot_relativeMagnitudes.Rd b/man/createSensitivityPlot_relativeMagnitudes.Rd index 8974fba..f3c2c3c 100644 --- a/man/createSensitivityPlot_relativeMagnitudes.Rd +++ b/man/createSensitivityPlot_relativeMagnitudes.Rd @@ -40,16 +40,16 @@ Returns ggplot object of the sensitivity plot. Ashesh Rambachan } \examples{ +\dontrun{ # Simple use case. For more detailed examples, - # see ; - # for additional precision, increase the number of gridPoints. + # see kwargs <- list(betahat = BCdata_EventStudy$betahat, sigma = BCdata_EventStudy$sigma, numPrePeriods = length(BCdata_EventStudy$prePeriodIndices), numPostPeriods = length(BCdata_EventStudy$postPeriodIndices), alpha = 0.05) - robustResults <- do.call(createSensitivityResults_relativeMagnitudes, - c(kwargs, list(gridPoints=100))) + robustResults <- do.call(createSensitivityResults_relativeMagnitudes, kwargs) originalResults <- do.call(constructOriginalCS, kwargs) createSensitivityPlot_relativeMagnitudes(robustResults, originalResults) } +} diff --git a/man/createSensitivityResults.Rd b/man/createSensitivityResults.Rd index 6d059a2..47a3769 100644 --- a/man/createSensitivityResults.Rd +++ b/man/createSensitivityResults.Rd @@ -75,6 +75,7 @@ Rambachan, Ashesh and Jonathan Roth. "An Honest Approach to Parallel Trends." 20 Ashesh Rambachan } \examples{ +\dontrun{ # Simple use case; for more detailed examples, # see createSensitivityResults(betahat = BCdata_EventStudy$betahat, @@ -83,3 +84,4 @@ Ashesh Rambachan numPostPeriods = length(BCdata_EventStudy$postPeriodIndices), alpha = 0.05) } +} diff --git a/man/createSensitivityResults_relativeMagnitudes.Rd b/man/createSensitivityResults_relativeMagnitudes.Rd index a3e1a9d..d251697 100644 --- a/man/createSensitivityResults_relativeMagnitudes.Rd +++ b/man/createSensitivityResults_relativeMagnitudes.Rd @@ -32,7 +32,7 @@ createSensitivityResults_relativeMagnitudes(betahat, sigma, Covariance matrix of event study coefficients. } \item{numPrePeriods}{ - Number of pre-periods. If user selects bound = "deviation from linear trends" (Delta^{SDRM} as base choice of Delta), then numPrePeriods must be greater than one. See details for further explanation. + Number of pre-periods. If user selects bound = "deviation from linear trends" (\eqn{Delta^{SDRM}} as base choice of Delta), then numPrePeriods must be greater than one. See details for further explanation. } \item{numPostPeriods}{ Number of post-periods. @@ -99,14 +99,14 @@ Rambachan, Ashesh and Jonathan Roth. "An Honest Approach to Parallel Trends." 20 Ashesh Rambachan } \examples{ +\dontrun{ # Simple use case. For more detailed examples, - # see ; - # for additional precision, increase the number of gridPoints. + # see kwargs <- list(betahat = BCdata_EventStudy$betahat, sigma = BCdata_EventStudy$sigma, numPrePeriods = length(BCdata_EventStudy$prePeriodIndices), numPostPeriods = length(BCdata_EventStudy$postPeriodIndices), - alpha = 0.05, - gridPoints = 100) + alpha = 0.05) do.call(createSensitivityResults_relativeMagnitudes, kwargs) } +} diff --git a/tests/test_base.R b/tests/test_base.R index a9b68b4..9e1dd87 100644 --- a/tests/test_base.R +++ b/tests/test_base.R @@ -14,163 +14,171 @@ BC_numPostPeriods <- length(BCdata_EventStudy$postPeriodIndices) BC_l_vec <- basisVector(index = 1, size = BC_numPostPeriods) BC_l_vec <- cbind(c(1, 0, 0, 0)) -test_that("HonestDiD base run with no errors", { - BC_DeltaSDNB_RobustResults <- - createSensitivityResults(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - method = "FLCI", - Mvec = seq(from=0, to=0.3, by=0.1)) - - BC_DeltaSDNB_RobustResultsConditional <- - createSensitivityResults(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - method = "Conditional", - Mvec = seq(from=0, to=0.3, by=0.1)) - - BC_DeltaSDNB_RobustResultsCF <- - createSensitivityResults(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - method = "C-F", - Mvec = seq(from=0, to=0.3, by=0.1)) - - BC_DeltaSDNB_RobustResultsCLF <- - createSensitivityResults(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - method = "C-LF", - Mvec = seq(from=0, to=0.3, by=0.1)) - - BC_OriginalResults <- - constructOriginalCS(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec) - - BC_DeltaSDNB_SensitivityPlot <- - createSensitivityPlot(robustResults = BC_DeltaSDNB_RobustResults, - originalResults = BC_OriginalResults) - - expect_silent(BC_DeltaSDNB_RobustResults) - expect_silent(BC_DeltaSDNB_RobustResultsConditional) - expect_silent(BC_DeltaSDNB_RobustResultsCF) - expect_silent(BC_DeltaSDNB_RobustResultsCLF) - expect_silent(BC_OriginalResults) - expect_silent(BC_DeltaSDNB_SensitivityPlot) -}) - -test_that("HonestDiD options run with no errors", { - LWdata_RawData = haven::read_dta(system.file("extdata", "LWdata_RawData.dta", package = "HonestDiD")) - sum(LWdata_RawData$nobs) - - # Estimate event study using lfe package - EmpFemale.EventStudy = lfe::felm(emp ~ - rtESV13 + rtESV14 + rtESV15 + - rtESV16 + rtESV17 + rtESV18 + - rtESV19 + rtESV110 + rtESV111 + # End Pre-periods - rtESV113 + rtESV114 + rtESV115 + - rtESV116 + rtESV117 + rtESV118 + - rtESV119 + rtESV120 + rtESV121 + - rtESV122 + rtESV123 + rtESV124 + - rtESV125 + rtESV126 + rtESV127 + - rtESV128 + rtESV129 + rtESV130 + - rtESV131 + rtESV132 + rtESV133 + - rtESV134 + rtESV135 + # End post-periods - yearsfcor + yearsflr + aveitc + fscontrol + - asian + black + hispanic + other | - factor(PUS_SURVEY_YEAR)*factor(BIRTHYEAR) + - factor(PUS_SURVEY_YEAR) + factor(BIRTHSTATE) | - 0 | BIRTHSTATE, - data = LWdata_RawData, - weights = LWdata_RawData$nobs) - summary(EmpFemale.EventStudy) - - coefIndex = which(grepl(x = dimnames(EmpFemale.EventStudy$coefficients)[[1]], pattern = "rtESV")) - betahat = EmpFemale.EventStudy$beta[coefIndex, ] - - # Extract estimated variance-covariance matrix of event study coefficients - sigma = EmpFemale.EventStudy$clustervcv[coefIndex, coefIndex] - - # Construct vector of event times and the scalar reference period - timeVec = c(seq(from = -11, to = -3, by = 1), seq(from = -1, to = 21, by = 1)) - referencePeriod <- -2 - postPeriodIndices <- which(timeVec > -2) - prePeriodIndices <- which(timeVec < -2) - LW_numPrePeriods <- length(prePeriodIndices) - LW_numPostPeriods <- length(postPeriodIndices) - LW_l_vec <- basisVector(index = 1, size = LW_numPostPeriods) - - for( method in c("C-F", "C-LF", "Conditional", "FLCI") ) { - for( monotonicityDirection in c("increasing", "decreasing") ) { - for ( biasDirection in c("positive", "negative") ) { - LW_DeltaSDNB_RobustResults <- - createSensitivityResults(betahat = betahat, - sigma = sigma, - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec, - method = method, - monotonicityDirection = monotonicityDirection, - biasDirection = biasDirection, - Mvec = seq(from=0, to=0.3, by=0.1)) - print(c(method, monotonicityDirection, biasDirection, LW_DeltaSDNB_RobustResults)) - expect_silent(LW_DeltaSDNB_RobustResults) +if ( Sys.getenv("HONESTDID_RUN_TESTS") == "1" ) { + test_that("HonestDiD base ran with no errors", { + BC_DeltaSDNB_RobustResults <- + createSensitivityResults(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + method = "FLCI", + Mvec = seq(from=0, to=0.3, by=0.1)) + + BC_DeltaSDNB_RobustResultsConditional <- + createSensitivityResults(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + method = "Conditional", + Mvec = seq(from=0, to=0.3, by=0.1)) + + BC_DeltaSDNB_RobustResultsCF <- + createSensitivityResults(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + method = "C-F", + Mvec = seq(from=0, to=0.3, by=0.1)) + + BC_DeltaSDNB_RobustResultsCLF <- + createSensitivityResults(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + method = "C-LF", + Mvec = seq(from=0, to=0.3, by=0.1)) + + BC_OriginalResults <- + constructOriginalCS(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec) + + BC_DeltaSDNB_SensitivityPlot <- + createSensitivityPlot(robustResults = BC_DeltaSDNB_RobustResults, + originalResults = BC_OriginalResults) + + expect_silent(BC_DeltaSDNB_RobustResults) + expect_silent(BC_DeltaSDNB_RobustResultsConditional) + expect_silent(BC_DeltaSDNB_RobustResultsCF) + expect_silent(BC_DeltaSDNB_RobustResultsCLF) + expect_silent(BC_OriginalResults) + expect_silent(BC_DeltaSDNB_SensitivityPlot) + }) +} else { + print("HonestDiD base run was skipped") +} + +if ( Sys.getenv("HONESTDID_RUN_TESTS") == "1" ) { + test_that("HonestDiD options ran with no errors", { + LWdata_RawData = haven::read_dta(system.file("extdata", "LWdata_RawData.dta", package = "HonestDiD")) + sum(LWdata_RawData$nobs) + + # Estimate event study using lfe package + EmpFemale.EventStudy = lfe::felm(emp ~ + rtESV13 + rtESV14 + rtESV15 + + rtESV16 + rtESV17 + rtESV18 + + rtESV19 + rtESV110 + rtESV111 + # End Pre-periods + rtESV113 + rtESV114 + rtESV115 + + rtESV116 + rtESV117 + rtESV118 + + rtESV119 + rtESV120 + rtESV121 + + rtESV122 + rtESV123 + rtESV124 + + rtESV125 + rtESV126 + rtESV127 + + rtESV128 + rtESV129 + rtESV130 + + rtESV131 + rtESV132 + rtESV133 + + rtESV134 + rtESV135 + # End post-periods + yearsfcor + yearsflr + aveitc + fscontrol + + asian + black + hispanic + other | + factor(PUS_SURVEY_YEAR)*factor(BIRTHYEAR) + + factor(PUS_SURVEY_YEAR) + factor(BIRTHSTATE) | + 0 | BIRTHSTATE, + data = LWdata_RawData, + weights = LWdata_RawData$nobs) + summary(EmpFemale.EventStudy) + + coefIndex = which(grepl(x = dimnames(EmpFemale.EventStudy$coefficients)[[1]], pattern = "rtESV")) + betahat = EmpFemale.EventStudy$beta[coefIndex, ] + + # Extract estimated variance-covariance matrix of event study coefficients + sigma = EmpFemale.EventStudy$clustervcv[coefIndex, coefIndex] + + # Construct vector of event times and the scalar reference period + timeVec = c(seq(from = -11, to = -3, by = 1), seq(from = -1, to = 21, by = 1)) + referencePeriod <- -2 + postPeriodIndices <- which(timeVec > -2) + prePeriodIndices <- which(timeVec < -2) + LW_numPrePeriods <- length(prePeriodIndices) + LW_numPostPeriods <- length(postPeriodIndices) + LW_l_vec <- basisVector(index = 1, size = LW_numPostPeriods) + + for( method in c("C-F", "C-LF", "Conditional", "FLCI") ) { + for( monotonicityDirection in c("increasing", "decreasing") ) { + for ( biasDirection in c("positive", "negative") ) { + LW_DeltaSDNB_RobustResults <- + createSensitivityResults(betahat = betahat, + sigma = sigma, + numPrePeriods = LW_numPrePeriods, + numPostPeriods = LW_numPostPeriods, + l_vec = LW_l_vec, + method = method, + monotonicityDirection = monotonicityDirection, + biasDirection = biasDirection, + Mvec = seq(from=0, to=0.3, by=0.1)) + print(c(method, monotonicityDirection, biasDirection, LW_DeltaSDNB_RobustResults)) + expect_silent(LW_DeltaSDNB_RobustResults) + } } } - } - - for ( method in c(NULL, "C-LF", "Conditional") ) { - for ( monotonicityDirection in c("increasing", "decreasing", NULL) ) { - for ( bound in c("deviation from parallel trends", "deviation from linear trend") ) { - BC_DeltaRM_RobustResults <- - createSensitivityResults_relativeMagnitudes(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - gridPoints = 100, - grid.ub = 1, - grid.lb = -1, - bound = bound, - method = method, - monotonicityDirection = monotonicityDirection, - Mbarvec = seq(from=0, to=1, by=0.5)) - print(c(method, monotonicityDirection, biasDirection, bound, BC_DeltaRM_RobustResults)) - expect_silent(BC_DeltaRM_RobustResults) + + for ( method in c(NULL, "C-LF", "Conditional") ) { + for ( monotonicityDirection in c("increasing", "decreasing", NULL) ) { + for ( bound in c("deviation from parallel trends", "deviation from linear trend") ) { + BC_DeltaRM_RobustResults <- + createSensitivityResults_relativeMagnitudes(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + gridPoints = 100, + grid.ub = 1, + grid.lb = -1, + bound = bound, + method = method, + monotonicityDirection = monotonicityDirection, + Mbarvec = seq(from=0, to=1, by=0.5)) + print(c(method, monotonicityDirection, biasDirection, bound, BC_DeltaRM_RobustResults)) + expect_silent(BC_DeltaRM_RobustResults) + } } } - } - - for ( method in c(NULL, "C-LF", "Conditional") ) { - for ( biasDirection in c("positive", "negative", NULL) ) { - for ( bound in c("deviation from parallel trends", "deviation from linear trend") ) { - BC_DeltaRM_RobustResults <- - createSensitivityResults_relativeMagnitudes(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - gridPoints = 100, - grid.ub = 1, - grid.lb = -1, - bound = bound, - method = method, - biasDirection = biasDirection, - Mbarvec = seq(from=0, to=1, by=0.5)) - print(c(method, monotonicityDirection, biasDirection, bound, BC_DeltaRM_RobustResults)) - expect_silent(BC_DeltaRM_RobustResults) + + for ( method in c(NULL, "C-LF", "Conditional") ) { + for ( biasDirection in c("positive", "negative", NULL) ) { + for ( bound in c("deviation from parallel trends", "deviation from linear trend") ) { + BC_DeltaRM_RobustResults <- + createSensitivityResults_relativeMagnitudes(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + gridPoints = 100, + grid.ub = 1, + grid.lb = -1, + bound = bound, + method = method, + biasDirection = biasDirection, + Mbarvec = seq(from=0, to=1, by=0.5)) + print(c(method, monotonicityDirection, biasDirection, bound, BC_DeltaRM_RobustResults)) + expect_silent(BC_DeltaRM_RobustResults) + } } } - } -}) + }) +} else { + print("HonestDiD options run was skipped") +} diff --git a/vignettes/HonestDiD_Example.Rmd b/vignettes/HonestDiD_Example.Rmd index 6f9b1de..0e71fde 100644 --- a/vignettes/HonestDiD_Example.Rmd +++ b/vignettes/HonestDiD_Example.Rmd @@ -1,5 +1,5 @@ --- -output: +output: pdf_document: citation_package: natbib latex_engine: pdflatex @@ -20,14 +20,14 @@ vignette: > library('knitr') knitr::opts_knit$set(self.contained = FALSE) knitr::opts_chunk$set(tidy = TRUE, collapse=TRUE, comment = "#>", - tidy.opts=list(blank=FALSE, width.cutoff=60)) + tidy.opts=list(blank=FALSE, width.cutoff=61)) ``` # Description The `HonestDiD` package implements the methods developed in @RambachanRoth for performing inference in difference-in-differences and event-study designs that is robust to violations of the parallel trends assumption. See @RambachanRoth for methodological details. -We illustrate how the package can be used by replicating Figure 4 in @RambachanRoth, which applies these methods to @BenzartiCarloni's results on the effects of a decrease in the value-added tax in France on restaurant profits, and replicating Figure 6 in @RambachanRoth, which applies these methods to @LovenheimWillen's results on the effect of public sector bargaining laws on long-run female labor market outcomes. +We illustrate how the package can be used by replicating Figure 4 in @RambachanRoth, which applies these methods to @BenzartiCarloni's results on the effects of a decrease in the value-added tax in France on restaurant profits, and replicating Figure 6 in @RambachanRoth, which applies these methods to @LovenheimWillen's results on the effect of public sector bargaining laws on long-run female labor market outcomes. The estimated event study coefficients and variance-covariance matrix for the baseline estimates on profits in @BenzartiCarloni are included in the package as `BCdata_EventStudy` (see Section 6.1 of @RambachanRoth for details on the underlying event study specification). The estimated event study coefficients and variance-covariance matrix for the baseline female estimates on employment of @LovenheimWillen are included in the package as `LWdata_EventStudy` (see Section 6.2 of @RambachanRoth for details on the underlying event study specification). The underlying raw data for the estimated event study of @LovenheimWillen are also included in the package. @@ -37,7 +37,8 @@ To install the package, use the function `install_github()` from the `remotes` p ```{r, eval = FALSE} install.packages("remotes") # if remotes package not installed -# Turn off warning-error-conversion, because the tiniest warning stops installation +# Turn off warning-error-conversion, because the tiniest +# warning stops installation Sys.setenv("R_REMOTES_NO_ERRORS_FROM_WARNINGS" = "true") # install from github @@ -51,22 +52,22 @@ library('HonestDiD') # Background: choices of $\Delta$ -Following @RambachanRoth, the parameter of interest is $\theta = l'\tau_{post}$, where $\tau_{post}$ is the vector of dynamic causal effects of interest in the post-periods and $l$ is a vector specified by the user. For instance, if the user is interested in the effect in the first period after treatment, then $l$ should be set to the basis vector with a 1 in the 1st position and zeros elsewhere. +Following @RambachanRoth, the parameter of interest is $\theta = l'\tau_{post}$, where $\tau_{post}$ is the vector of dynamic causal effects of interest in the post-periods and $l$ is a vector specified by the user. For instance, if the user is interested in the effect in the first period after treatment, then $l$ should be set to the basis vector with a 1 in the 1st position and zeros elsewhere. -To construct confidence sets for $\theta$ that are robust to violations of the parallel trends assumption, the user must specify the set $\Delta$, which describes the set of possible violations of the parallel trends assumption that the user is willing to consider. +To construct confidence sets for $\theta$ that are robust to violations of the parallel trends assumption, the user must specify the set $\Delta$, which describes the set of possible violations of the parallel trends assumption that the user is willing to consider. The `HonestDiD` package currently allows for several choices of $\Delta$. We discuss these possible choices of $\Delta$ below. We refer the reader to Section 2.3 for extensive details on these choices. ## Smoothness Restrictions -A reasonable baseline in many cases is $\Delta = \Delta^{SD}(M)$, which requires that the underlying trend not deviate ``too much'' from linearity. It imposes that the change in the slope of the underlying trend (i.e. the second difference in $\delta$) be no more than $M$ between consecutive periods, where $M$ is a parameter that is specified by the researcher. Formally, this set is defined as -$$ +A reasonable baseline in many cases is $\Delta = \Delta^{SD}(M)$, which requires that the underlying trend not deviate ``too much'' from linearity. It imposes that the change in the slope of the underlying trend (i.e. the second difference in $\delta$) be no more than $M$ between consecutive periods, where $M$ is a parameter that is specified by the researcher. Formally, this set is defined as +$$ \Delta^{SD}(M) = \{\delta : | (\delta_t - \delta_{t-1}) - (\delta_{t-1} - \delta_{t-2}) | \leq M\}. $$ -For the choice $M = 0$, this choice of $\Delta$ limits the violation of parallel trends to be linear, while $M>0$ relaxes the assumption of exact linearity. See Section 2.3 of @RambachanRoth for further discussion. +For the choice $M = 0$, this choice of $\Delta$ limits the violation of parallel trends to be linear, while $M>0$ relaxes the assumption of exact linearity. See Section 2.3 of @RambachanRoth for further discussion. -The user may additionally restrict the sign of the bias in the post-period. This may be reasonable, for instance, in cases where there is a simulataneous policy change which we think affects the outcome of interest in a particular direction. We refer to restrictions that combine $\Delta^{SD}$ with a restriction on the post-period bias by $\Delta = \Delta^{SDB}(M)$. For example, $\Delta^{SDPB}(M)$ additionally imposes that the violation of parallel trends must be positive in the post-periods, $\delta_t \geq 0$ for $t \geq 0$. Likewise, $\Delta^{SDNB}(M)$ additionally imposes that the violation of parallel trends must be negative in the post-periods $\delta_t \leq 0$ for $t \geq 0$. +The user may additionally restrict the sign of the bias in the post-period. This may be reasonable, for instance, in cases where there is a simulataneous policy change which we think affects the outcome of interest in a particular direction. We refer to restrictions that combine $\Delta^{SD}$ with a restriction on the post-period bias by $\Delta = \Delta^{SDB}(M)$. For example, $\Delta^{SDPB}(M)$ additionally imposes that the violation of parallel trends must be positive in the post-periods, $\delta_t \geq 0$ for $t \geq 0$. Likewise, $\Delta^{SDNB}(M)$ additionally imposes that the violation of parallel trends must be negative in the post-periods $\delta_t \leq 0$ for $t \geq 0$. -The researcher may specify additional shape restrictions that specify that the violation of parallel trends must be monotonically increasing or decreasing. Such restrictions may be reasonable in cases where the researcher is concerned about secular trends that would have continued absent treatment. We refer to restrictions that combine $\Delta^{SD}$ with monotonicity restrictions by $\Delta = \Delta^{SDM}(M)$. In the case where the violation of parallel trends must be increasing, denoted by $\Delta^{SDI}(M)$, this additionally restricts $\delta_{t} \geq \delta_{t-1}$ for all $t$. In the case where the violation of parallel trends must be decreasing, denoted by $\Delta^{SDD}(M)$, this additionally restricts $\delta_{t} \leq \delta_{t-1}$ for all $t$. +The researcher may specify additional shape restrictions that specify that the violation of parallel trends must be monotonically increasing or decreasing. Such restrictions may be reasonable in cases where the researcher is concerned about secular trends that would have continued absent treatment. We refer to restrictions that combine $\Delta^{SD}$ with monotonicity restrictions by $\Delta = \Delta^{SDM}(M)$. In the case where the violation of parallel trends must be increasing, denoted by $\Delta^{SDI}(M)$, this additionally restricts $\delta_{t} \geq \delta_{t-1}$ for all $t$. In the case where the violation of parallel trends must be decreasing, denoted by $\Delta^{SDD}(M)$, this additionally restricts $\delta_{t} \leq \delta_{t-1}$ for all $t$. These "smoothness restriction" based choices of $\Delta$ are implemented in `HonestDiD`. @@ -75,22 +76,22 @@ These "smoothness restriction" based choices of $\Delta$ are implemented in `Hon Alternatively, the researcher may specify $\Delta$ in a manner that bounds the worst-case violation of parallel trends in the post-treatment period based on the observed worst-case violation in the pre-treatment period. @RambachanRoth consider two forms of such $\Delta$. First, $\Delta^{RM}(\bar{M})$ bounds the maximum post-treatment violation of parallel trends (between consecutive periods) by $\bar{M}$ times the maximum pre-treatment violation of parallel trends. This is defined as -$$ +$$ \Delta^{RM}(\bar{M}) = \{ \delta \,:\, \forall t\geq0, \, |\delta_{t+1} - \delta_{t}| \leq \bar{M} \cdot \max_{s< 0} |\delta_{s+1} - \delta_{s}| \} $$ Second $\Delta^{SDRM}(\bar{M})$ bounds the maximum deviation from a linear trend in the post-treatment period by $\bar{M}$ times the observed maximum deviation from a linear trend in the pre-treatment period. This is defined as -$$ +$$ \Delta^{SDRM}(\bar{M}) = \{ \delta \,:\, \forall t\geq0, \, |(\delta_{t+1} - \delta_{t}) - (\delta_{t} - \delta_{t-1}) | \leq \bar{M} \cdot \max_{s<0} |(\delta_{s+1} - \delta_{s}) - (\delta_{s} - \delta_{s-1})| \} $$ -Notice that this choice of $\delta = \Delta^{SDRM}(\bar{M})$ is analogous to the earlier choice $\Delta^{SD}(M)$, but it allows the magnitude of the possible deviations from a linear trend to depend on the observed pre-treatment trends (rather than being imposed a priori by the researcher). Both of these choices $\Delta^{RM}(\bar{M})$ and $\Delta^{SDRM}(\bar{M})$ may also be combined with the sign and shape restrictions discussed above. +Notice that this choice of $\delta = \Delta^{SDRM}(\bar{M})$ is analogous to the earlier choice $\Delta^{SD}(M)$, but it allows the magnitude of the possible deviations from a linear trend to depend on the observed pre-treatment trends (rather than being imposed a priori by the researcher). Both of these choices $\Delta^{RM}(\bar{M})$ and $\Delta^{SDRM}(\bar{M})$ may also be combined with the sign and shape restrictions discussed above. These "relative magnitude" based choices of $\Delta$ are implemented in `HonestDiD`. -# Constructing a sensitivity plot +# Constructing a sensitivity plot We next show to use the package `HonestDiD` to conduct a formal sensitivity analysis. We recommend that the user creates a sensitivity plot that shows how the robust confidence sets vary under different assumptions about $\Delta$ (e.g., letting $M$ vary or adding sign/shape restrictions). -The function `createSensitivityResults` provides a wrapper function to conduct sensitivity analysis for the ``smoothness restriction'' based choices of $\Delta$ discussed in the previous section. This function takes as inputs the estimated event study coefficients, the estimated variance-covariance matrix of the estimates along with the user's choice of $\Delta$ and chosen method for constructing robust confidence intervals. It returns the upper and lower bounds of the robust confidence sets for a vector of choices of $M$ as a dataframe. The researcher may specify that $\Delta$ equals $\Delta^{SD}(M)$, $\Delta^{SDB}(M)$ or $\Delta^{SDM}(M)$. In the latter two cases, the user additionally specifies the sign/direction of the bias/monotonicity. +The function `createSensitivityResults` provides a wrapper function to conduct sensitivity analysis for the ``smoothness restriction'' based choices of $\Delta$ discussed in the previous section. This function takes as inputs the estimated event study coefficients, the estimated variance-covariance matrix of the estimates along with the user's choice of $\Delta$ and chosen method for constructing robust confidence intervals. It returns the upper and lower bounds of the robust confidence sets for a vector of choices of $M$ as a dataframe. The researcher may specify that $\Delta$ equals $\Delta^{SD}(M)$, $\Delta^{SDB}(M)$ or $\Delta^{SDM}(M)$. In the latter two cases, the user additionally specifies the sign/direction of the bias/monotonicity. If the user leaves the desired method as `NULL` in `createSensitivityResults`, the function automatically selects the robust confidence interval based upon the recommendations in Section 5.3 of @RambachanRoth. If $\Delta = \Delta^{SD}(M)$, the FLCI is used. If $\Delta = \Delta^{SDB}(M)$ or $\Delta = \Delta^{SDM}(M)$, the conditional FLCI hybrid confidence set is used. As a default, the function sets the parameter of interest to be the first post-period causal effect, $\theta = \tau_1$. The user may directly specify the parameter of interest by setting the input `l_vec`. @@ -100,7 +101,7 @@ If the user leaves the desired method as `NULL` in `createSensitivityResults_rel # Sensitivity Analysis: Incidence of Value-Added Tax Cut -## Preliminaries +## Preliminaries The included data frame `BCdata_EventStudy` contains the estimated event study coefficients and estimated variance-covariance matrix for the baseline estimates for profits based on the event study specification of @BenzartiCarloni. As discussed in Section 6.1 of @RambachanRoth, the authors estimate the effect of a reduction in the value-added tax in France in July 2009 on restaurant profits. Comparing restaurants to a control group of other market services firms that were unaffected by the policy change, the authors estimate the event study specification $$ @@ -108,6 +109,11 @@ Y_{it} = \sum_{s\neq2008} \beta_s \times 1[t = s] \times D_{i} + \phi_i + \lamb $$ where $Y_{it}$ is the log of (before-tax) profits for firm $i$ in in year $t$; $D_{i}$ is an indicator for whether firm $i$ is a restaurant; $\phi_i$ and $\lambda_t$ are firm and year fixed effects; and standard errors are clustered at the regional level. `BCdata_EventStudy` contains the estimated event study coefficients $\hat \beta_{s}$, the associated variance-covariance matrix of these estimates and some additional information about the event study specification. The next code snippet loads the data. +```{r, echo=FALSE} +data('VignetteResults', package = "HonestDiD") +for (i in names(VignetteResults)) assign(i, VignetteResults[[i]]) +``` + ```{r} data('BCdata_EventStudy', package = "HonestDiD") @@ -122,57 +128,78 @@ We now show how to use the package `HonestDiD` to conduct a formal sensitivity a In the next code snippet, we conduct the sensitivity analysis plotted in the top right panel of Figure 4 in @RambachanRoth, which shows a sensitivity analysis using $\Delta = \Delta^{SDRM}(\bar{M})$ for the effect on profits in 2009, $\theta = \tau_{2009}$. To do so, we set `bound = "deviation from linear trend"` in `createSensitivityResults_relativeMagnitudes`. -```{r, warning = FALSE} -# Create l_vec to define the parameter of interest, the first post-treatment period. +```{r, warning = FALSE, eval = FALSE} +# Create l_vec to define the parameter of interest, the +# first post-treatment period. BC_l_vec = basisVector(index = 1, size = BC_numPostPeriods) -# Construct robust confidence intervals for Delta^{SDRM}(Mbar) for first post-treatment period. -# We specify 100 gridPoints over [-1, 1] for the underlying test inversion to construct the robust confidence set. +# Construct robust confidence intervals for +# Delta^{SDRM}(Mbar) for first post-treatment period. We +# specify 100 gridPoints over [-1, 1] for the underlying +# test inversion to construct the robust confidence set. # Users may wish to leave this at the default values. -BC_DeltaSDRM_RobustResults = createSensitivityResults_relativeMagnitudes(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - bound = "deviation from linear trend", - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - Mbarvec = seq(from = 0, to = 2, by = 0.5), - gridPoints = 100, grid.lb = -1, grid.ub = 1) +BC_DeltaSDRM_RobustResults = createSensitivityResults_relativeMagnitudes( # + betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + bound = "deviation from linear trend", + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + Mbarvec = seq(from = 0, to = 2, by = 0.5), + gridPoints = 100, grid.lb = -1, grid.ub = 1) + head(BC_DeltaSDRM_RobustResults) ``` -The function `createSensitivityPlot_relativeMagnitudes` can then be used to construct a sensitivity plot presenting these results. `createSensitivityPlot_relativeMagnitudes` takes two key inputs. The first input is the dataframe that is produced by `createSensitivityResults_relativeMagnitudes`, which contains the robust confidence intervals. The second input is a dataframe that contains the OLS confidence set for the parameter of interest. This dataframe can be constructed using the function `constructOriginalCS`. In the next code snippet, we show how these functions can be used to replicate the top right panel of Figure 4 in @RambachanRoth. +```{r, echo=FALSE} +BC_l_vec = basisVector(index = 1, size = BC_numPostPeriods) +head(BC_DeltaSDRM_RobustResults) +``` + +The function `createSensitivityPlot_relativeMagnitudes` can then be used to construct a sensitivity plot presenting these results. `createSensitivityPlot_relativeMagnitudes` takes two key inputs. The first input is the dataframe that is produced by `createSensitivityResults_relativeMagnitudes`, which contains the robust confidence intervals. The second input is a dataframe that contains the OLS confidence set for the parameter of interest. This dataframe can be constructed using the function `constructOriginalCS`. In the next code snippet, we show how these functions can be used to replicate the top right panel of Figure 4 in @RambachanRoth. ```{r, warning = FALSE} -# Construct dataframe with OLS confidence interval for theta. -BC_OriginalResults = constructOriginalCS(betahat = BCdata_EventStudy$betahat, +# Construct dataframe with OLS confidence interval for theta +BC_OriginalResults = constructOriginalCS(betahat = BCdata_EventStudy$betahat, sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, + numPrePeriods = BC_numPrePeriods, numPostPeriods = BC_numPostPeriods, l_vec = BC_l_vec ) # Construct sensitivity plot. -BC_DeltaSDRM_SensitivityPlot = createSensitivityPlot_relativeMagnitudes(robustResults = BC_DeltaSDRM_RobustResults, - originalResults = BC_OriginalResults) +BC_DeltaSDRM_SensitivityPlot = createSensitivityPlot_relativeMagnitudes( # + robustResults = BC_DeltaSDRM_RobustResults, + originalResults = BC_OriginalResults) + BC_DeltaSDRM_SensitivityPlot ``` + This sensitivity analysis finds that the "breakdown value" of $\bar{M}$ is about 1.5, which means that the significant treatment effect found in 2009 is robust to allowing for a non-linearity in the differential trend between restaurants and other service firms in the post-treatment period that is about 1.5 times the maximum observed non-linearity in the pre-treatment period. We also discuss how additional context-specific knowledge may inform the sensitivity analysis. As discussed in Section 6.1 of @RambachanRoth, @BenzartiCarloni indicate that their event-study estimates may be biased since other confounding policy changes occurred at the same time of the value-added tax change. This suggests that we may wish to incorporate an additional sign restriction in the sensitivity analysis, which imposes that the sign of the bias be negative. The next code snippet therefore conducts the sensitivity analysis plotted in the bottom right panel of Figure 4 in @RambachanRoth, which shows a sensitivity analysis using $\Delta = \Delta^{SDNB}(M) := \Delta^{SD}(M) \cap \{\delta \,:\, \delta_{post} \leq 0 \}$. We construct the sensitivity analysis using `createSensitivityResults` and specify that `biasDirection = "negative"`. We plot the results using `createSensitivityPlot`. -```{r, warning = FALSE} -# Construct robust confidence intervals for Delta^{SDNB}(M) for first post-treatment period -BC_DeltaSDNB_RobustResults = createSensitivityResults(betahat = BCdata_EventStudy$betahat, - sigma = BCdata_EventStudy$sigma, - numPrePeriods = BC_numPrePeriods, - numPostPeriods = BC_numPostPeriods, - l_vec = BC_l_vec, - Mvec = seq(from = 0, to = 0.3, by = 0.1), - biasDirection = "negative") -BC_DeltaSDNB_SensitivityPlot = createSensitivityPlot(robustResults = BC_DeltaSDNB_RobustResults, - originalResults = BC_OriginalResults) +```{r, warning = FALSE, eval = FALSE} +# Construct robust confidence intervals for Delta^{SDNB}(M) +# for first post-treatment period +BC_DeltaSDNB_RobustResults = createSensitivityResults( # + betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + Mvec = seq(from = 0, to = 0.3, by = 0.1), + biasDirection = "negative") +BC_DeltaSDNB_SensitivityPlot = createSensitivityPlot( # + robustResults = BC_DeltaSDNB_RobustResults, + originalResults = BC_OriginalResults) BC_DeltaSDNB_SensitivityPlot ``` +```{r, echo = FALSE} +createSensitivityPlot(robustResults = BC_DeltaSDNB_RobustResults, + originalResults = BC_OriginalResults) +``` + # Sensitivity Analysis: Effect of Public Sector Bargaining Laws ## Preliminaries @@ -199,71 +226,83 @@ We now show how to use the package `HonestDiD` to conduct a formal sensitivity a In the next code snippet, we conduct the sensitivity analysis plotted in the right panel of Figure 6 in @RambachanRoth, which shows a sensitivity analysis using $\Delta = \Delta^{SD}(M)$ for the effect on female employment after 15 years of exposure to a duty-to-bargain law, $\theta = \tau_{15}$. -```{r warning = FALSE} +```{r warning = FALSE, eval = FALSE} # Create l_vec corresponding with 15 years of exposure -# Reference is -2 years of exposure, so want effect 17 pds later +# Reference is -2 years of exposure, so want effect 17 pds later LW_l_vec = basisVector(15 - (-2), LW_numPostPeriods) # Construct robust confidence intervals for Delta^{SD}(M) for 15 years of exposure -LW_DeltaSD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, +LW_DeltaSD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods, + numPrePeriods = LW_numPrePeriods, numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec, + l_vec = LW_l_vec, Mvec = seq(from = 0, to = 0.04, by = 0.005)) head(LW_DeltaSD_RobustResults) ``` -The function `createSensitivityPlot` can then be used to construct a sensitivity plot presenting these results. `createSensitivityPlot` takes two key inputs. The first input is the dataframe that is produced by `createSensitivityResults`, which contains the robust confidence intervals. The second input is a dataframe that contains the OLS confidence set for the parameter of interest. This dataframe can be constructed using the function `constructOriginalCS`. In the next code snippet, we show how these functions can be used to replicate the right panel of Figure 6 in @RambachanRoth. +```{r echo = FALSE} +LW_l_vec = basisVector(15 - (-2), LW_numPostPeriods) +head(LW_DeltaSD_RobustResults) +``` + +The function `createSensitivityPlot` can then be used to construct a sensitivity plot presenting these results. `createSensitivityPlot` takes two key inputs. The first input is the dataframe that is produced by `createSensitivityResults`, which contains the robust confidence intervals. The second input is a dataframe that contains the OLS confidence set for the parameter of interest. This dataframe can be constructed using the function `constructOriginalCS`. In the next code snippet, we show how these functions can be used to replicate the right panel of Figure 6 in @RambachanRoth. ```{r warning = FALSE} # Construct dataframe with OLS confidence interval for theta -LW_OriginalResults = constructOriginalCS(betahat = LWdata_EventStudy$betahat, +LW_OriginalResults = constructOriginalCS(betahat = LWdata_EventStudy$betahat, sigma = LWdata_EventStudy$sigma, - numPrePeriods = LW_numPrePeriods, + numPrePeriods = LW_numPrePeriods, numPostPeriods = LW_numPostPeriods, l_vec = LW_l_vec ) # Construct sensitivity plot -LW_DeltaSD_SensitivityPlot = createSensitivityPlot(robustResults = LW_DeltaSD_RobustResults, - originalResults = LW_OriginalResults) +LW_DeltaSD_SensitivityPlot = createSensitivityPlot( # + robustResults = LW_DeltaSD_RobustResults, + originalResults = LW_OriginalResults) LW_DeltaSD_SensitivityPlot ``` -In the next code snippet, we conduct an additional sensitivity analysis. The exercise is similar to that shown above, except we now impose that any violations of parallel trends be (weakly) decreasing ($\Delta = \Delta^{SDD}(M)$). This incorporates the intuition from @LovenheimWillen that the pre-trends for women are likely due to secular trends in female labor supply that would have continued absent treatment. +In the next code snippet, we conduct an additional sensitivity analysis. The exercise is similar to that shown above, except we now impose that any violations of parallel trends be (weakly) decreasing ($\Delta = \Delta^{SDD}(M)$). This incorporates the intuition from @LovenheimWillen that the pre-trends for women are likely due to secular trends in female labor supply that would have continued absent treatment. -```{r warning = FALSE} +```{r warning = FALSE, eval = FALSE} # Construct robust confidence intervals for Delta^{SDD}(M) -LW_DeltaSDD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, - monotonicityDirection = "decreasing", - numPrePeriods = LW_numPrePeriods, - numPostPeriods = LW_numPostPeriods, - l_vec = LW_l_vec, - Mvec = seq(from = 0, to = 0.04, by = 0.005)) - -# Construct sensitivity plot -LW_DeltaSDD_SensitivityPlot = createSensitivityPlot(robustResults = LW_DeltaSDD_RobustResults, - originalResults = LW_OriginalResults) +LW_DeltaSDD_RobustResults = createSensitivityResults( # + betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, + monotonicityDirection = "decreasing", + numPrePeriods = LW_numPrePeriods, + numPostPeriods = LW_numPostPeriods, + l_vec = LW_l_vec, + Mvec = seq(from = 0, to = 0.04, by = 0.005)) + +# Construct sensitivity plot +LW_DeltaSDD_SensitivityPlot = createSensitivityPlot( # + robustResults = LW_DeltaSDD_RobustResults, + originalResults = LW_OriginalResults) LW_DeltaSDD_SensitivityPlot ``` +```{r echo = FALSE} +createSensitivityPlot(robustResults = LW_DeltaSDD_RobustResults, originalResults = LW_OriginalResults) +``` + ## Benchmarking $M$ The sensitivity plots discussed above show how our conclusions change as we allow for larger degrees of possible non-linearity in the violations of parallel trends, parameterized by $M$. @RambachanRoth discuss multiple ways for benchmarking $M$ in applied settings. -One approach for benchmarking $M$ is to use context-specific knowledge about the magnitudes of potential confounds. For instance, in the context of @LovenheimWillen, one concern is differential changes in education quality that would have occurred even absent the passage of DTB laws. Section 6.2 of @RambachanRoth calibrates $M$ using estimates of the effect of teacher quality on adult employment from @CFR2014. In this calibration, a value of $M = 0.01$ corresponds with a change in slope of the diffferential trend corresponding with a change in teacher quality of $0.025$ standard deviations. +One approach for benchmarking $M$ is to use context-specific knowledge about the magnitudes of potential confounds. For instance, in the context of @LovenheimWillen, one concern is differential changes in education quality that would have occurred even absent the passage of DTB laws. Section 6.2 of @RambachanRoth calibrates $M$ using estimates of the effect of teacher quality on adult employment from @CFR2014. In this calibration, a value of $M = 0.01$ corresponds with a change in slope of the diffferential trend corresponding with a change in teacher quality of $0.025$ standard deviations. In some cases, it may also be useful to benchmark $M$ -- which bounds the change in slope of the differential trend between consecutive periods -- using estimates of the largest change in slope in the pre-period. We provide the functions `DeltaSD_lowerBound_Mpre` and `DeltaSD_upperBound_Mpre`, which create one-sided confidence intervals for the largest change in slope in the pre-period. Values of $M$ below the values computed by `DeltaSD_lowerBound_Mpre` are rejected by the data (at the given significance level), and thus should be viewed with caution. On the other hand, we stress that data from the pre-period cannot, on its own, place an upper bound on the possible degree of non-linearity under the counterfactual in the post-periods. However, in some cases it may be useful to benchmark the assumed maximal degree of non-linearity $M$ in terms of the largest change in slope in the pre-period. These functions can also be used analogously to benchmark $M$ using event-studies for placebo groups. See the R documentation for additional details. Both functions require the user to specify the vector of estimated event study coefficients, the variance covariance matrix, the number of pre-periods and the desired size of the one-sided confidence intervals. They can be used as follows: -```{r} -LW_lowerBound_M = DeltaSD_lowerBound_Mpre(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, +```{r, eval = FALSE} +LW_lowerBound_M = DeltaSD_lowerBound_Mpre(betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, numPrePeriods = LW_numPrePeriods) -LW_upperBound_M = DeltaSD_upperBound_Mpre(betahat = LWdata_EventStudy$betahat, - sigma = LWdata_EventStudy$sigma, +LW_upperBound_M = DeltaSD_upperBound_Mpre(betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, numPrePeriods = LW_numPrePeriods) ``` @@ -271,27 +310,27 @@ LW_upperBound_M = DeltaSD_upperBound_Mpre(betahat = LWdata_EventStudy$betahat, We now provide additional details on the construction of the event-study results from @LovenheimWillen used in the example above. This event study specification can be fully replicated using a dataframe provided in the package `HonestDiD`. Within the subdirectory `inst/extdata`, the package provides the stata dataset `LWdata_RawData.dta`. This contains the estimation sample for females that is used to estimate the above event study specification. This dataset is provided in the replication files for @LovenheimWillen and can be found in the subdirectory of the replication files, `Data files/Estimation samples/Estimation_FEMALE.dta`. The following code snippet shows how to reproduce the event study above using the provided data -```{r eval = FALSE} +```{r eval = FALSE, eval = FALSE} # Load in LWdata_RawData.dta -LWdata_RawData = haven::read_dta(system.file("extdata", "LWdata_RawData.dta", +LWdata_RawData = haven::read_dta(system.file("extdata", "LWdata_RawData.dta", package = "HonestDiD")) # Estimate event study using lfe package EmpFemale.EventStudy = lfe::felm(emp ~ rtESV13 + rtESV14 + rtESV15 + rtESV16 + rtESV17 + rtESV18 + - rtESV19 + rtESV110 + rtESV111 + # End Pre-periods - rtESV113 + rtESV114 + rtESV115 + - rtESV116 + rtESV117 + rtESV118 + - rtESV119 + rtESV120 + rtESV121 + - rtESV122 + rtESV123 + rtESV124 + - rtESV125 + rtESV126 + rtESV127 + - rtESV128 + rtESV129 + rtESV130 + - rtESV131 + rtESV132 + rtESV133 + + rtESV19 + rtESV110 + rtESV111 + # End Pre-periods + rtESV113 + rtESV114 + rtESV115 + + rtESV116 + rtESV117 + rtESV118 + + rtESV119 + rtESV120 + rtESV121 + + rtESV122 + rtESV123 + rtESV124 + + rtESV125 + rtESV126 + rtESV127 + + rtESV128 + rtESV129 + rtESV130 + + rtESV131 + rtESV132 + rtESV133 + rtESV134 + rtESV135 + # End post-periods - yearsfcor + yearsflr + aveitc + fscontrol + - asian + black + hispanic + other | - factor(PUS_SURVEY_YEAR)*factor(BIRTHYEAR) + - factor(PUS_SURVEY_YEAR) + factor(BIRTHSTATE) | + yearsfcor + yearsflr + aveitc + fscontrol + + asian + black + hispanic + other | + factor(PUS_SURVEY_YEAR)*factor(BIRTHYEAR) + + factor(PUS_SURVEY_YEAR) + factor(BIRTHSTATE) | 0 | BIRTHSTATE, data = LWdata_RawData, weights = LWdata_RawData$nobs) @@ -319,12 +358,12 @@ stdErrors = summary(EmpFemale.EventStudy)$coefficients[coefIndex,2] # Create list containing objects produced by the event study LWdata_EventStudy = list( - betahat = betahat, - sigma = sigma, - timeVec = timeVec, - referencePeriod = referencePeriod, - prePeriodIndices = prePeriodIndices, - postPeriodIndices = postPeriodIndices, + betahat = betahat, + sigma = sigma, + timeVec = timeVec, + referencePeriod = referencePeriod, + prePeriodIndices = prePeriodIndices, + postPeriodIndices = postPeriodIndices, stdErrors = stdErrors ) ``` diff --git a/vignettes/HonestDiD_Example.pdf b/vignettes/HonestDiD_Example.pdf index 6fa0503..7d3ec43 100644 Binary files a/vignettes/HonestDiD_Example.pdf and b/vignettes/HonestDiD_Example.pdf differ diff --git a/vignettes/precompute.R b/vignettes/precompute.R new file mode 100644 index 0000000..f3dcbf2 --- /dev/null +++ b/vignettes/precompute.R @@ -0,0 +1,167 @@ +library(HonestDiD) + +# data('VignetteResults', package="HonestDiD") +data('BCdata_EventStudy', package="HonestDiD") + +# Number of pre-periods +BC_numPrePeriods = length(BCdata_EventStudy$prePeriodIndices) +BC_numPostPeriods = length(BCdata_EventStudy$postPeriodIndices) + +# Create l_vec to define the parameter of interest, the first post-treatment period. +BC_l_vec = basisVector(index = 1, size = BC_numPostPeriods) + +# Construct robust confidence intervals for Delta^{SDRM}(Mbar) for first post-treatment period. +# We specify 100 gridPoints over [-1, 1] for the underlying test inversion to construct the robust confidence set. +# Users may wish to leave this at the default values. +BC_DeltaSDRM_RobustResults = + createSensitivityResults_relativeMagnitudes(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + bound = "deviation from linear trend", + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + Mbarvec = seq(from = 0, to = 2, by = 0.5), + gridPoints = 100, grid.lb = -1, grid.ub = 1) + +head(BC_DeltaSDRM_RobustResults) + +# Construct dataframe with OLS confidence interval for theta. +BC_OriginalResults = constructOriginalCS(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec ) + +# Construct sensitivity plot. +BC_DeltaSDRM_SensitivityPlot = + createSensitivityPlot_relativeMagnitudes(robustResults = BC_DeltaSDRM_RobustResults, + originalResults = BC_OriginalResults) + +BC_DeltaSDRM_SensitivityPlot + +# Construct robust confidence intervals for Delta^{SDNB}(M) for first post-treatment period +BC_DeltaSDNB_RobustResults = createSensitivityResults(betahat = BCdata_EventStudy$betahat, + sigma = BCdata_EventStudy$sigma, + numPrePeriods = BC_numPrePeriods, + numPostPeriods = BC_numPostPeriods, + l_vec = BC_l_vec, + Mvec = seq(from = 0, to = 0.3, by = 0.1), + biasDirection = "negative") +BC_DeltaSDNB_SensitivityPlot = createSensitivityPlot(robustResults = BC_DeltaSDNB_RobustResults, + originalResults = BC_OriginalResults) +BC_DeltaSDNB_SensitivityPlot + +data('LWdata_EventStudy', package = "HonestDiD") + +# Number of pre-periods +LW_numPrePeriods = length(LWdata_EventStudy$prePeriodIndices) +LW_numPostPeriods = length(LWdata_EventStudy$postPeriodIndices) + +# Create l_vec corresponding with 15 years of exposure +# Reference is -2 years of exposure, so want effect 17 pds later +LW_l_vec = basisVector(15 - (-2), LW_numPostPeriods) + +# Construct robust confidence intervals for Delta^{SD}(M) for 15 years of exposure +LW_DeltaSD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, + numPrePeriods = LW_numPrePeriods, + numPostPeriods = LW_numPostPeriods, + l_vec = LW_l_vec, + Mvec = seq(from = 0, to = 0.04, by = 0.005)) +head(LW_DeltaSD_RobustResults) + +# Construct dataframe with OLS confidence interval for theta +LW_OriginalResults = constructOriginalCS(betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, + numPrePeriods = LW_numPrePeriods, + numPostPeriods = LW_numPostPeriods, + l_vec = LW_l_vec ) + +# Construct sensitivity plot +LW_DeltaSD_SensitivityPlot = createSensitivityPlot(robustResults = LW_DeltaSD_RobustResults, + originalResults = LW_OriginalResults) +LW_DeltaSD_SensitivityPlot + +# Construct robust confidence intervals for Delta^{SDD}(M) +LW_DeltaSDD_RobustResults = createSensitivityResults(betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, + monotonicityDirection = "decreasing", + numPrePeriods = LW_numPrePeriods, + numPostPeriods = LW_numPostPeriods, + l_vec = LW_l_vec, + Mvec = seq(from = 0, to = 0.04, by = 0.005)) + +# Construct sensitivity plot +LW_DeltaSDD_SensitivityPlot = createSensitivityPlot(robustResults = LW_DeltaSDD_RobustResults, + originalResults = LW_OriginalResults) +LW_DeltaSDD_SensitivityPlot + +LW_lowerBound_M = DeltaSD_lowerBound_Mpre(betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, + numPrePeriods = LW_numPrePeriods) +LW_upperBound_M = DeltaSD_upperBound_Mpre(betahat = LWdata_EventStudy$betahat, + sigma = LWdata_EventStudy$sigma, + numPrePeriods = LW_numPrePeriods) + +# Load in LWdata_RawData.dta +LWdata_RawData = haven::read_dta(system.file("extdata", "LWdata_RawData.dta", + package = "HonestDiD")) + +# Estimate event study using lfe package +EmpFemale.EventStudy = lfe::felm(emp ~ rtESV13 + rtESV14 + rtESV15 + + rtESV16 + rtESV17 + rtESV18 + + rtESV19 + rtESV110 + rtESV111 + # End Pre-periods + rtESV113 + rtESV114 + rtESV115 + + rtESV116 + rtESV117 + rtESV118 + + rtESV119 + rtESV120 + rtESV121 + + rtESV122 + rtESV123 + rtESV124 + + rtESV125 + rtESV126 + rtESV127 + + rtESV128 + rtESV129 + rtESV130 + + rtESV131 + rtESV132 + rtESV133 + + rtESV134 + rtESV135 + # End post-periods + yearsfcor + yearsflr + aveitc + fscontrol + + asian + black + hispanic + other | + factor(PUS_SURVEY_YEAR)*factor(BIRTHYEAR) + + factor(PUS_SURVEY_YEAR) + factor(BIRTHSTATE) | + 0 | BIRTHSTATE, + data = LWdata_RawData, + weights = LWdata_RawData$nobs) + +# Extract coefficients of regression associated with event study coefficients +coefIndex = which(grepl(x = dimnames(EmpFemale.EventStudy$coefficients)[[1]], + pattern = "rtESV")) +betahat = EmpFemale.EventStudy$beta[coefIndex, ] + +# Extract estimated variance-covariance matrix of event study coefficients +sigma = EmpFemale.EventStudy$clustervcv[coefIndex, coefIndex] + +#Rescale by 100 so that results will be in units of percentage points +betahat = 100 * betahat +sigma = 100^2 * sigma + +# Construct vector of event times and the scalar reference period +timeVec = c(seq(from = -11, to = -3, by = 1), seq(from = -1, to = 21, by = 1)) +referencePeriod = -2 +postPeriodIndices = which(timeVec > -2) +prePeriodIndices = which(timeVec < -2) + +# Construct standard errors associated with event study coefficients +stdErrors = summary(EmpFemale.EventStudy)$coefficients[coefIndex,2] + +# Create list containing objects produced by the event study +LWdata_EventStudy = list( + betahat = betahat, + sigma = sigma, + timeVec = timeVec, + referencePeriod = referencePeriod, + prePeriodIndices = prePeriodIndices, + postPeriodIndices = postPeriodIndices, + stdErrors = stdErrors +) + +VignetteResults <- list(BC_DeltaSDRM_RobustResults = BC_DeltaSDRM_RobustResults, + BC_OriginalResults = BC_OriginalResults, + BC_DeltaSDNB_RobustResults = BC_DeltaSDNB_RobustResults, + LW_DeltaSD_RobustResults = LW_DeltaSD_RobustResults, + LW_DeltaSDD_RobustResults = LW_DeltaSDD_RobustResults) +save(VignetteResults, file="data/VignetteResults.rda")