Skip to content

Commit

Permalink
Merge pull request #243 from NSAPH-Software/bug_fix_4
Browse files Browse the repository at this point in the history
Bug fix 4
  • Loading branch information
Naeemkh authored Apr 13, 2024
2 parents e3f2f07 + a0e0306 commit efdc59c
Show file tree
Hide file tree
Showing 20 changed files with 6,970 additions and 7 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
.Rproj.user/*
.Rhistory
.RData
*.RData
*.rds
.Ruserdata
inst/doc
tests/testthat/_snaps/*
Expand All @@ -17,4 +19,4 @@ src-i386
functional_tests/supplements/*
*zip_data.RData
*log*
*.rds
*a.rds
2 changes: 0 additions & 2 deletions R/estimate_erf.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,6 @@ estimate_erf <- function(.data,
result_data_prediction <- data.frame(w_vals = w_vals,
y_pred = y_pred)

print(erf_np)

} else {
stop("The code should never get here. Double check.")
}
Expand Down
8 changes: 6 additions & 2 deletions R/estimate_gps.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#'
#' @return
#' The function returns a S3 object. Including the following:
#' - `.data `: `id`, `w`, `gps`, `e_gps_pred`, `e_gps_std_pred`, `w_resid`
#' - `.data `: `id`, `exposure_var`, `gps`, `e_gps_pred`, `e_gps_std_pred`,
#' `w_resid`
#' - `params`: Including the following fields:
#' - gps_mx (min and max of gps)
#' - w_mx (min and max of w).
Expand Down Expand Up @@ -113,7 +114,10 @@ estimate_gps <- function(.data,
gps_mx <- compute_min_max(gps)

# create new data.frame for output
dataset <- data.frame(id = .data$id, w = response_data, gps = gps)
dataset <- stats::setNames(data.frame(.data$id,
response_data,
gps),
c("id", response_var, "gps"))
dataset$e_gps_pred <- e_gps_pred
if (length(e_gps_std_pred) == 1){
e_gps_std_pred <- rep(e_gps_std_pred, nrow(dataset))
Expand Down
2 changes: 1 addition & 1 deletion R/matching_fn.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ matching_fn <- function(w,
std_w <- (w - w_min) / (w_max - w_min)
std_p_w <- (p_w - gps_min) / (gps_max - gps_min)

dataset_subset <- dataset[abs(dataset[["w"]] - w) <= (delta_n / 2), ]
dataset_subset <- dataset[abs(dataset[[exposure_col_name]] - w) <= (delta_n / 2), ]

if (nrow(dataset_subset) < 1){
logger:: log_warn(paste("There is no data to match with ", w, "in ",
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
70 changes: 70 additions & 0 deletions functional_tests/paper_1_estimating_gps_normal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
## author: Naeem Khoshnevis
## created: September 2023
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)
library(data.table)

# Load data --------------------------------------------------------------------
data_file <- "zip_data.RData"
if (!file.exists(data_file)) {
stop(paste0("Download the study data file from the following link:\n",
"https://drive.google.com/file/d/",
"1QFdbVU8Qir1gWf96c5h_ZhT-aPjhHpqn/view?usp=share_link"))
} else {
load(data_file)
}

data.table::setDF(zip_data)
data <- zip_data

# Add id to the data
data$id <- 1:nrow(data)
data$w <- data$pm25
data$pm25 <- NULL

save(data, file = "study_data.RData")

# Estimate GPS -----------------------------------------------------------------

## Super learner wrapper
m_xgboost <- function(nthread = 6,
ntrees = 50,
shrinkage = 0.3,
max_depth = 6,
minobspernode = 1,
verbose = 1,
...) {SuperLearner::SL.xgboost(
nthread = nthread,
ntrees = ntrees,
shrinkage=shrinkage,
max_depth=max_depth,
mibobspernode=minobspernode,
verbose=verbose,
...)}

exposure <- "w"
confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")

formula_str <- paste(exposure, " ~ ", paste(confounders, collapse = " + "))


data_with_gps_normal <- estimate_gps(.data = data,
.formula = as.formula(formula_str),
gps_density = "normal",
sl_lib = c("m_xgboost")
)


pdf("figure_paper_1_estimating_gps_normal.pdf")
plot(data_with_gps_normal)
dev.off()

save(data_with_gps_normal, file = "data_with_gps_normal.RData")
66 changes: 66 additions & 0 deletions functional_tests/paper_2_estimating_gps_kernel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)
library(data.table)

# Load data --------------------------------------------------------------------
data_file <- "zip_data.RData"
if (!file.exists(data_file)) {
stop(paste0("Download the study data file from the following link:\n",
"https://drive.google.com/file/d/",
"1QFdbVU8Qir1gWf96c5h_ZhT-aPjhHpqn/view?usp=share_link"))
} else {
load(data_file)
}

data.table::setDF(zip_data)
data <- zip_data

# Add id to the data
data$id <- 1:nrow(data)

# Estimate GPS -----------------------------------------------------------------

## Super learner wrapper
m_xgboost <- function(nthread = 6,
ntrees = 50,
shrinkage = 0.3,
max_depth = 6,
minobspernode = 1,
verbose = 1,
...) {SuperLearner::SL.xgboost(
nthread = nthread,
ntrees = ntrees,
shrinkage=shrinkage,
max_depth=max_depth,
mibobspernode=minobspernode,
verbose=verbose,
...)}

exposure <- "pm25"
confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")

formula_str <- paste(exposure, " ~ ", paste(confounders, collapse = " + "))


data_with_gps_kernel <- estimate_gps(.data = data,
.formula = as.formula(formula_str),
gps_density = "kernel",
sl_lib = c("m_xgboost")
)


pdf("figure_paper_2_estimating_gps_kernel.pdf")
plot(data_with_gps_kernel)
dev.off()

save(data_with_gps_kernel, file = "data_with_gps_kernel.RData")
26 changes: 26 additions & 0 deletions functional_tests/paper_3_compute_weight_counter_weighting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load gps object
load("data_with_gps_normal.RData")


cw_weighting_object <- compute_counter_weight(gps_obj = data_with_gps_normal,
ci_appr = "weighting",
bin_seq = NULL,
nthread = 6,
delta_n = 0.1,
dist_measure = "l1",
scale = 0.5)


save(cw_weighting_object, file = "cw_weighting_object.RData")


24 changes: 24 additions & 0 deletions functional_tests/paper_4_compute_weight_counter_matching.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load gps object
load("data_with_gps_normal.RData")


cw_matching_object <- compute_counter_weight(gps_obj = data_with_gps_normal,
ci_appr = "matching",
bin_seq = NULL,
nthread = 6,
delta_n = 0.1,
dist_measure = "l1",
scale = 0.5)


save(cw_matching_object, file = "cw_matching_object.RData")
36 changes: 36 additions & 0 deletions functional_tests/paper_5_compute_pseudo_pop_weighting.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load cw object and data
load("cw_weighting_object.RData")
load("study_data.RData")

confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")


pseudo_pop_weighting_object <- generate_pseudo_pop(
.data = data,
cw_obj = cw_weighting_object,
covariate_col_names = confounders,
covar_bl_trs = 0.1,
covar_bl_trs_type = "maximal",
covar_bl_method = "absolute")


save(pseudo_pop_weighting_object, file = "pseudo_pop_weighting_object.RData")


pdf("figure_paper_5_pseudo_pop_weighting_object.pdf")
plot(pseudo_pop_weighting_object)
dev.off()
35 changes: 35 additions & 0 deletions functional_tests/paper_6_compute_pseudo_pop_matching.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
## author: Naeem Khoshnevis
## created: March 2024
## purpose: Reproducing examples in the paper.


# Load libraries
library(ggplot2)
library(CausalGPS)


# Load cw object and data
load("cw_matching_object.RData")
load("study_data.RData")

confounders <- c("mean_bmi", "smoke_rate",
"hispanic", "pct_blk", "medhouseholdincome",
"medianhousevalue", "poverty", "popdensity",
"pct_owner_occ", "summer_tmmx", "winter_tmmx",
"summer_rmax", "winter_rmax", "year")


pseudo_pop_weighting_object <- generate_pseudo_pop(
.data = data,
cw_obj = cw_matching_object,
covariate_col_names = confounders,
covar_bl_trs = 0.1,
covar_bl_trs_type = "maximal",
covar_bl_method = "absolute")


save(pseudo_pop_matching_object, file = "pseudo_pop_matching_object.RData")

pdf("figure_paper_6_pseudo_pop_matching_object.pdf")
plot(pseudo_pop_matching_object)
dev.off()
Loading

0 comments on commit efdc59c

Please sign in to comment.