allow for newdata argument (continuous data only), addressing #40

ecmerkle · Mar 14, 2024 · ac636c1 · ac636c1
1 parent eb7683f
commit ac636c1
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 7 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: blavaan
 Title: Bayesian Latent Variable Analysis
-Version: 0.5-3.1245
+Version: 0.5-3.1246
 Authors@R: c(person(given = "Edgar", family = "Merkle",
                     role = c("aut", "cre"),
                     email = "[email protected]",

diff --git a/NAMESPACE b/NAMESPACE
@@ -36,7 +36,7 @@ importFrom("lavaan",
            "lav_model_set_parameters", "lav_model_vcov_se",
            "lav_partable_attributes",
            "modificationIndices", "parTable", "parameterEstimates",
-           "lavPredict", "standardizedSolution")
+           "lavPredict", "standardizedSolution", "lav_data_update")
 
 importFrom("coda",
            "mcmc.list",

diff --git a/R/blav_predict.R b/R/blav_predict.R
@@ -36,7 +36,12 @@ blavPredict <- function(object, newdata = NULL, type = "lv", level = 1L) {
     if(type %in% c("yhat", "ypred", "ymis")) stop("blavaan ERROR: option", type, "is not yet implemented for two-level models.", call. = FALSE)
   }
 
-  if(!is.null(newdata)) stop("blavaan ERROR: posterior predictions for newdata are not currently supported")
+  if(!is.null(newdata)) {
+    if(!stantarget) stop("blavaan ERROR: newdata is currently only available for target='stan'")
+    if(lavInspect(object, "categorical")) stop("blavaan ERROR: newdata is not yet available for ordinal data.")
+    object <- blav_fill_newdata(object, newdata)
+  }
+
 
   ## lv: posterior dist of lvs (use blavInspect functionality); matrix frame
   ## lvmeans: use blavInspect functionality; matrix
@@ -120,3 +125,44 @@ blavPredict <- function(object, newdata = NULL, type = "lv", level = 1L) {
 
   out
 }
+
+## fill blavaan object with newdata, then sample lvs given already-sampled parameters
+blav_fill_newdata <- function(object, newdat) {
+
+  lavd <- getFromNamespace("lavData", "lavaan")
+  olddata <- object@Data
+  OV <- olddata@ov
+  object@Data <- lavd(data = newdat,
+                      group = olddata@group,
+                      ov.names = olddata@ov.names,
+                      ov.names.x = olddata@ov.names.x,
+                      ordered = OV$names[ OV$type == "ordered" ],
+                      lavoptions = object@Options, allow.single.case = TRUE)
+
+  ## Stan-formatted newdata
+  l2s <- lav2stanmarg(object, dp = blavInspect(object, 'options')$dp,
+                      n.chains = blavInspect(object, 'nchains'), inits = "simple")
+  l2slev2 <- lav2stanmarg(object, dp = blavInspect(object, 'options')$dp,
+                          n.chains = blavInspect(object, 'nchains'),
+                          inits = "simple", level = 2, indat = l2s$dat)
+  l2s$dat <- c(l2s$dat, l2slev2$dat)
+  l2s$dat <- l2s$dat[!duplicated(names(l2s$dat))]
+  l2s$free2 <- c(l2s$free2, l2slev2$free2)
+  l2s$lavpartable <- rbind(l2s$lavpartable, l2slev2$lavpartable)
+  l2s$wigpris <- c(l2s$wigpris, l2slev2$wigpris)
+  l2s$init <- lapply(1:length(l2s$init), function(i) c(l2s$init[[i]], l2slev2$init[[i]]))
+  ldargs <- c(l2s$dat, list(lavpartable = l2s$lavpartable, dumlv = l2s$dumlv, dumlv_c = l2slev2$dumlv,
+                            save_lvs = TRUE, do_test = FALSE))
+  smd <- do.call("stanmarg_data", ldargs)
+
+  newlvs <- samp_lvs(object@external$mcmcout, object@Model, object@ParTable, smd, eeta = NULL, categorical = FALSE)
+  lvsumm <- as.matrix(rstan::monitor(newlvs, print=FALSE))
+  cmatch <- match(colnames(object@external$stansumm), colnames(lvsumm))
+  stansumm <- object@external$stansumm
+  lvcols <- grep("^eta", rownames(stansumm))
+  if (length(lvcols) > 0) stansumm <- stansumm[-lvcols, ]
+  object@external$stansumm <- rbind(stansumm, lvsumm[,cmatch])
+  object@external$stanlvs <- newlvs
+
+  object
+}
diff --git a/man/blavPredict.Rd b/man/blavPredict.Rd
@@ -11,8 +11,8 @@ blavPredict(object, newdata = NULL, type = "lv", level = 1L)
 }
 \arguments{
 \item{object}{An object of class \code{\linkS4class{blavaan}}.}
-\item{newdata}{Currently unused. (An optional data.frame, containing the same variables as
-the data.frame used when fitting the model in object.)}
+\item{newdata}{An optional data.frame, containing the same variables as
+the data.frame used when fitting the model in object.}
 \item{type}{A character string. If \code{"lv"}, estimated values for the latent
 variables in the model are computed. If \code{"ov"} or \code{"yhat"}, predicted means for
 the observed variables in the model are computed. If
@@ -43,7 +43,7 @@ are observations and columns are observed variables.
 
 \code{type="ypred"}: The posterior predictive distribution of observed
 variables conditioned on the sampled latent variables (including
-residual variances). Returns a list with "number of samples" entries,
+residual variability). Returns a list with "number of samples" entries,
 where each entry is a data frame where rows are observations and columns
 are observed variables.
 
@@ -54,7 +54,7 @@ values conditioned on observed variables. Returns a matrix with
 }
 \seealso{
 Users may also wish to generate the posterior predictive distribution of
-observed data, not conditioned on the latent variables; this
+observed data, not conditioned on the latent variables. This
 would often be viewed as data from new clusters (people) that were not
 observed in the original dataset. For that, see \code{sampleData()}.
 }