Skip to content

Commit

Permalink
Merge pull request #62 from NSAPH-Software/release_0.2.1
Browse files Browse the repository at this point in the history
Release 0.2.1
  • Loading branch information
Naeemkh authored Mar 15, 2023
2 parents 3cf62a5 + 4ea1be3 commit 737a88a
Show file tree
Hide file tree
Showing 155 changed files with 2,915 additions and 1,943 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GPCERF
Title: Gaussian Processes for Estimating Causal Exposure Response Curves
Version: 0.2.0
Version: 0.2.1
Authors@R: c(
person("Naeem", "Khoshnevis", email = "[email protected]",
role=c("aut","cre"),
Expand Down Expand Up @@ -31,9 +31,11 @@ Imports:
Rcpp,
RcppArmadillo,
ggplot2,
cowplot,
rlang,
Rfast,
SuperLearner
SuperLearner,
wCorr
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.1
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ export(compute_rl_deriv_nn)
export(compute_w_corr)
export(estimate_cerf_gp)
export(estimate_cerf_nngp)
export(estimate_gps)
export(generate_synthetic_data)
export(get_logger)
export(set_logger)
export(train_gps)
import(MASS)
import(Rcpp)
import(RcppArmadillo)
Expand Down
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# GPCERF 0.2.1 (2023-01-15)

## Changed
- `full GP` --> `standard GP`
- `plot`s of exposure response function objects include covariate balance.
- `formula` is no longer need in nn functions.
- `estimate_gps` now returns the used exposure level, too.
- `train_gps` --> `estimate_gps`
- The nearest neighbor approach does not get `expand` as an input parameter (`n_neighbor` * `expand` --> `n_neighbor`).
- The weighted covariate balance now is computed using the wCorr package.


# GPCERF 0.2.0 (2023-01-22)

## Changed
Expand Down
28 changes: 0 additions & 28 deletions R/calc_ac.R

This file was deleted.

106 changes: 58 additions & 48 deletions R/compute_deriv_nn.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,24 @@
#'
#' @param w A scalar of exposure level of interest.
#' @param w_obs A vector of observed exposure levels of all samples.
#' @param GPS_m A data.frame of GPS vectors. Including:
#' - Column 1: GPS values.
#' - Column 2: Prediction of exposure for covariate of each data sample (e_gps_pred).
#' - Column 3: Standard deviation of e_gps (e_gps_std).
#' @param gps_m An S3 gps object including:
#' gps: A data.frame of GPS vectors.
#' - Column 1: GPS
#' - Column 2: Prediction of exposure for covariate of each data sample
#' (e_gps_pred).
#' - Column 3: Standard deviation of e_gps (e_gps_std)
#' used_params:
#' - dnorm_log: TRUE or FALSE
#' @param y_obs A vector of observed outcome values.
#' @param hyperparam A vector of hyper-parameters in the GP model.
#' @param n_neighbor The number of nearest neighbors on one side (see also \code{expand}).
#' @param expand Scaling factor to determine the total number of nearest
#' neighbors. The total is \code{2 * expand * n_neighbor}.
#' @param block_size The number of samples included in a computation block. Mainly used to
#' balance the speed and memory requirement. Larger \code{block_size} is faster, but requires more memory.
#' @param kernel_fn The covariance function. The input is the square of Euclidean distance.
#' @param kernel_deriv_fn The partial derivative of the covariance function. The input is the square of Euclidean distance.
#' @param n_neighbor The number of nearest neighbors on one side.
#' @param block_size The number of samples included in a computation block.
#' Mainly used to balance the speed and memory requirement. Larger
#' \code{block_size} is faster, but requires more memory.
#' @param kernel_fn The covariance function. The input is the square of
#' Euclidean distance.
#' @param kernel_deriv_fn The partial derivative of the covariance function.
#' The input is the square of Euclidean distance.
#'
#' @return
#' A scalar of estimated derivative of CERF at \code{w} in nnGP.
Expand All @@ -28,14 +33,13 @@
#'
compute_deriv_nn <- function(w,
w_obs,
GPS_m,
gps_m,
y_obs,
hyperparam,
n_neighbor,
expand,
block_size,
kernel_fn = function(x) exp(-x),
kernel_deriv_fn = function(x) -exp(-x)){
kernel_deriv_fn = function(x) -exp(-x)) {


# Get hyperparameters
Expand All @@ -45,54 +49,60 @@ compute_deriv_nn <- function(w,


# Get gps and helper functions
GPS <- GPS_m$GPS
e_gps_pred <- GPS_m$e_gps_pred
e_gps_std <- GPS_m$e_gps_std
gps <- gps_m$gps$gps
e_gps_pred <- gps_m$gps$e_gps_pred
e_gps_std <- gps_m$gps$e_gps_std
dnorm_log <- gps_m$used_params$dnorm_log

gps_w <- dnorm(w, mean = e_gps_pred, sd = e_gps_std, log = dnorm_log)

GPS_w <- dnorm(w, mean = e_gps_pred, sd = e_gps_std, log = TRUE)
n <- length(gps_w)
n_block <- ceiling(n / block_size)
obs_raw <- cbind(w_obs, gps)
obs_ord <- obs_raw[order(obs_raw[, 1]), ]
y_obs_ord <- y_obs[order(obs_raw[, 1])]

n <- length(GPS_w)
n_block <- ceiling(n/block_size)
obs_raw <- cbind(w_obs, GPS)
obs_ord <- obs_raw[order(obs_raw[,1]),]
y_obs_ord <- y_obs[order(obs_raw[,1])]


if(w >= obs_ord[nrow(obs_ord),1]){
idx_all <- seq( nrow(obs_ord) - expand*n_neighbor + 1, nrow(obs_ord), 1)
}else{
idx_anchor <- which.max(obs_ord[,1]>=w)
idx_start <- max(1, idx_anchor - n_neighbor*expand)
idx_end <- min(nrow(obs_ord), idx_anchor + n_neighbor*expand)
if(idx_end == nrow(obs_ord)){
idx_all <- seq(idx_end - n_neighbor*2*expand + 1, idx_end, 1)
}else{
idx_all <- seq(idx_start, idx_start+n_neighbor*2*expand-1, 1)
if (w >= obs_ord[nrow(obs_ord), 1]) {
idx_all <- seq(nrow(obs_ord) - n_neighbor + 1, nrow(obs_ord), 1)
} else {
idx_anchor <- which.max(obs_ord[, 1] >= w)
idx_start <- max(1, idx_anchor - n_neighbor)
idx_end <- min(nrow(obs_ord), idx_anchor + n_neighbor)
if (idx_end == nrow(obs_ord)) {
idx_all <- seq(idx_end - n_neighbor * 2 + 1, idx_end, 1)
} else {
idx_all <- seq(idx_start, idx_start + n_neighbor * 2 - 1, 1)
}
}

obs_use <- t(t(obs_ord[idx_all,])*(1/sqrt(c(alpha, beta))))
obs_use <- t(t(obs_ord[idx_all, ]) * (1 / sqrt(c(beta, alpha))))
y_use <- y_obs_ord[idx_all]

obs_new <- t(t(cbind(w, GPS_w))*(1/sqrt(c(alpha, beta))))
id_all <- split(1:n, ceiling(seq_along(1:n)/n_block))
Sigma_obs <- g_sigma*kernel_fn(as.matrix(dist(obs_use))^2) + diag(nrow(obs_use))
Sigma_obs_inv <- chol2inv(chol(Sigma_obs))
obs_new <- t(t(cbind(w, gps_w)) * (1 / sqrt(c(beta, alpha))))
id_all <- split(1:n, ceiling(seq_along(1:n) / n_block))
# sigma refers to capital Sigma in the paper.
sigma_obs <- g_sigma * kernel_fn(as.matrix(dist(obs_use)) ^ 2) +
diag(nrow(obs_use))
sigma_obs_inv <- chol2inv(chol(sigma_obs))

all_weights <- sapply(id_all, function(id_ind) {

all_weights <- sapply(id_all, function(id.ind){
# TODO: change index to column name.
cross_dist <- spatstat.geom::crossdist(obs_new[id_ind, 1],
obs_new[id_ind, 2],
obs_use[, 1], obs_use[, 2])

cross_dist <- spatstat.geom::crossdist(obs_new[id.ind,1], obs_new[id.ind,2],
obs_use[,1], obs_use[,2])

Sigma_cross <- g_sigma*(1/alpha)*(2*outer(rep(w,length(id.ind))*(1/alpha),
obs_use[,1], "-"))*
kernel_deriv_fn(cross_dist^2)
sigma_cross <- g_sigma * (1 / beta) *
(2 * outer(rep(w, length(id_ind)) * (1 / beta),
obs_use[, 1], "-")) *
kernel_deriv_fn(cross_dist ^ 2)
#mean
wght <- Sigma_cross%*%Sigma_obs_inv
wght <- sigma_cross %*% sigma_obs_inv
colSums(wght)
})
weights <- rowSums(all_weights)/n
weights <- rowSums(all_weights) / n

return(weights%*%y_use)
return(weights %*% y_use)
}
48 changes: 28 additions & 20 deletions R/compute_deriv_weights_gp.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
#'
#' @param w A scalar of exposure level of interest.
#' @param w_obs A vector of observed exposure levels of all samples.
#' @param GPS_m A data.frame of GPS vectors. Including:
#' - Column 1: GPS values.
#' - Column 2: Prediction of exposure for covariate of each data
#' sample (e_gps_pred).
#' - Column 3: Standard deviation of e_gps (e_gps_std)
#' @param gps_m An S3 gps object including:
#' gps: A data.frame of GPS vectors.
#' - Column 1: GPS
#' - Column 2: Prediction of exposure for covariate of each data sample
#' (e_gps_pred).
#' - Column 3: Standard deviation of e_gps (e_gps_std)
#' used_params:
#' - dnorm_log: TRUE or FLASE
#' @param hyperparam A vector of hyper-parameters in the GP model.
#' @param kernel_fn The covariance function.
#' @param kernel_deriv_fn The partial derivative of the covariance function.
Expand All @@ -24,36 +27,41 @@
#'
compute_deriv_weights_gp <- function(w,
w_obs,
GPS_m,
gps_m,
hyperparam,
kernel_fn = function(x) exp(-x),
kernel_deriv_fn = function(x) -exp(-x)){
kernel_deriv_fn = function(x) -exp(-x)) {


alpha <- hyperparam[[1]]
beta <- hyperparam[[2]]
g_sigma <- hyperparam[[3]]


GPS <- GPS_m$GPS
e_gps_pred <- GPS_m$e_gps_pred
e_gps_std <- GPS_m$e_gps_std
gps <- gps_m$gps$gps
e_gps_pred <- gps_m$gps$e_gps_pred
e_gps_std <- gps_m$gps$e_gps_std
dnorm_log <- gps_m$used_params$dnorm_log

gps_w <- dnorm(w, mean = e_gps_pred, sd = e_gps_std, log = dnorm_log)
n <- length(gps_w)

GPS_w <- dnorm(w, mean = e_gps_pred, sd = e_gps_std, log = TRUE)
n <- length(GPS_w)
obs_use <- cbind(w_obs * sqrt(1 / beta), gps * sqrt(1 / alpha))
colnames(obs_use) <- c("w_sc_obs", "gps_sc_obs")

obs_use <- cbind(w_obs * sqrt(1 / alpha), GPS * sqrt(1 / beta))
obs_new <- cbind(w * sqrt(1 / alpha), GPS_w * sqrt(1 / beta))
Sigma_obs <- g_sigma * kernel_fn(as.matrix(dist(obs_use)) ^ 2) +
obs_new <- cbind(w * sqrt(1 / beta), gps_w * sqrt(1 / alpha))
colnames(obs_new) <- c("w_sc_for_w", "gps_sc_for_w")

sigma_obs <- g_sigma * kernel_fn(as.matrix(dist(obs_use)) ^ 2) +
diag(nrow(obs_use))
cross_dist <- spatstat.geom::crossdist(obs_new[, 1], obs_new[, 2],
obs_use[, 1], obs_use[, 2])
cross_dist <- spatstat.geom::crossdist(obs_new[, "w_sc_for_w"],
obs_new[, "gps_sc_for_w"],
obs_use[, "w_sc_obs"],
obs_use[, "gps_sc_obs"])

#TODO: Needs refactoring. `outer` function uses significant amount of memory.
Sigma_cross <- g_sigma * sqrt(1 / alpha) * kernel_deriv_fn(cross_dist ^ 2) *
sigma_cross <- g_sigma * sqrt(1 / beta) * kernel_deriv_fn(cross_dist ^ 2) *
(2 * outer(rep(w, n), w_obs, "-"))
weights_all <- Sigma_cross %*% chol2inv(chol(Sigma_obs))
weights_all <- sigma_cross %*% chol2inv(chol(sigma_obs))

return(colMeans(weights_all))
}
8 changes: 6 additions & 2 deletions R/compute_inverse.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@ compute_inverse <- function(mtrx) {
"Current format: ", class(mtrx)[1]))
}

if (nrow(mtrx) != ncol(mtrx)){
if (nrow(mtrx) != ncol(mtrx)) {
stop(paste0("The input mtrx should be a square matrix. ",
"Current dimension: nrow: ",
nrow(mtrx), ", ncol: ", ncol(mtrx)))
}

t_1 <- proc.time()
inv_mtrx <- chol2inv(chol(mtrx))

t_2 <- proc.time()
logger::log_debug("Wall clock time to compute inverse matrix ",
"({nrow(mtrx)}, {ncol(mtrx)}): ",
" {t_2[[3]] - t_1[[3]]} s.")
return(inv_mtrx)
}
Loading

0 comments on commit 737a88a

Please sign in to comment.