Merge pull request #4 from ecmerkle/master

update to latest blavaan
ecmerkle · Jan 29, 2024 · 56bc773 · 56bc773
2 parents 84e6d54 + df6f12f
commit 56bc773
Show file tree

Hide file tree

Showing 37 changed files with 1,312 additions and 858 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: blavaan
 Title: Bayesian Latent Variable Analysis
-Version: 0.4-9.1142
+Version: 0.5-3.1230
 Authors@R: c(person(given = "Edgar", family = "Merkle",
                     role = c("aut", "cre"),
                     email = "[email protected]",
@@ -35,10 +35,12 @@ Description: Fit a variety of Bayesian latent variable models, including confirm
 License: GPL (>= 3)
 ByteCompile: true
 Depends: R(>= 3.5.0), methods, Rcpp(>= 0.12.15)
-Imports: stats, utils, graphics, lavaan(>= 0.6-14), coda, mnormt, nonnest2(>= 0.5-5), loo(>= 2.0), rstan(>= 2.21.2), rstantools(>= 1.5.0), RcppParallel (>= 5.0.1), bayesplot, Matrix, future.apply, tmvnsim
-LinkingTo: StanHeaders (>= 2.18.1), rstan (>= 2.21.2), BH (>= 1.69.0), Rcpp (>= 0.12.15), RcppEigen (>= 0.3.3.4.0), RcppParallel (>= 5.0.1)
+Imports: stats, utils, graphics, lavaan(>= 0.6-17), coda, mnormt, nonnest2(>= 0.5-5), loo(>= 2.0), rstan(>= 2.26.0), rstantools(>= 1.5.0), RcppParallel (>= 5.0.1), bayesplot, Matrix, future.apply, tmvnsim
+LinkingTo: StanHeaders (>= 2.26.0), rstan (>= 2.26.0), BH (>= 1.69.0), Rcpp (>= 0.12.15), RcppEigen (>= 0.3.3.4.0), RcppParallel (>= 5.0.1)
 Suggests: runjags(>= 2.0.4-3), modeest(>= 2.3.3), rjags, cmdstanr, semTools, tinytest
 SystemRequirements: GNU make
 NeedsCompilation: yes
+URL: https://ecmerkle.github.io/blavaan/, https://github.com/ecmerkle/blavaan
+BugReports: https://github.com/ecmerkle/blavaan/issues
 Additional_repositories: https://mc-stan.org/r-packages/
 Config/Needs/website: brms
diff --git a/NAMESPACE b/NAMESPACE
@@ -12,7 +12,7 @@ importFrom("stats",
            "runif", "sd", "quantile", "rWishart", "cov", "cor",
            "coef", "logLik",
            "residuals", "resid",
-           "fitted.values", "fitted",
+           "fitted.values", "fitted", "na.omit",
            "predict",
            "update",
            "anova",

diff --git a/NEWS.md b/NEWS.md
@@ -1,10 +1,54 @@
+
+# Version 0.5-3
+## New features
+* Functionality to find unrestricted blocks of the model's psi matrix (lv covariance matrix). lkj priors are assigned to these unrestricted blocks, improving the positive definite issue described in the "Opaque priors" paper.
+
+* Improved functionality for obtaining posterior modes via, e.g., summary(., postmode = TRUE)
+
+* blavCompare() messaging is improved to clarify ELPD differences, and the function returns more output.
+
+* Bug fix in two-level models with within-only observed variables, messaging added for unstable ppp.
+
+* When extracting posterior draws via blavInspect(., "mcmc"), column names now match lavaan parameter names. For old behavior involving Stan parameter names, use argument add.labels = FALSE
+
+* Bugs from 0.5-2 are fixed.
+
+## Bugs/glitches discovered after the release:
+* Some models with exogenous covariates, fixed.x=TRUE, and missing data fail to converge and yield implausible parameter values (reported by DeAnne Hunter).
+
+
+# Version 0.5-2
+## New features
+* This is a maintenance release, primarily adding the new array declaration syntax in Stan models (syntax that became available in the new version of rstan).
+
+## Bugs/glitches discovered after the release:
+* blavCompare() does not work with models that have meanstructure = FALSE (reported by Pedro Ribeiro).
+
+* for target="jags", posterior modes cannot be obtained via postmode = TRUE (reported by Giada Venaruzzo).
+
+* models with both continuous and ordinal variables fail for cases where all ordinal variables are missing (reported by Sonja Winter).
+
+* certain equality constraints involving named parameters fail for target="stan" (reported by Niels Skovgaard-Olsen)
+
+# Version 0.5-1
+## New features
+* Two-level models are now supported (for complete, continuous data) via the cluster argument.
+
+## Bugs/glitches discovered after the release:
+* For two-level model specification, the levels have to be labeled "within" and "between". This is more restrictive than lavaan specification.
+
+* For target="jags", latent variable extraction via blavInspect(, "lvs") fails (reported by Joseph Saraceno).
+
 # Version 0.4-8
 ## New features
 * This is a maintenance release with bug fixes and some changes in compiler settings
 
 ## Bugs/glitches discovered after the release:
 * For certain models with residual correlations and/or correlated factors, the initial values under target='stan' lead to non-positive definite matrices (reported by Yuanyuan Hu).
 
+* For models where a latent variable is regressed on an observed variable (lv ~ ov), the latent variable samples do not account for the mean of the observed variable (they are centered around 0 and off by a constant).
+
+
 # Version 0.4-7
 ## New features
 * This is primarily an update to address a C++14 vs C++17 compilation issue identified by CRAN

diff --git a/R/blav_compare.R b/R/blav_compare.R
@@ -14,11 +14,15 @@ blavCompare <- function(object1, object2, ...) {
   res <- c(bf, object1@test[[1]]$stat, object2@test[[1]]$stat)
   names(res) <- c("bf", "mll1", "mll2")
 
-  ## FIXME? We already get case_lls in blav_fit_measures and should really
-  ## only do it once. But, if we store it in the blavaan object, the size
-  ## of that object can get much larger.
-  if(targ1 == "stan"){
+  if(targ1 == "stan" && blavInspect(object1, "meanstructure")){
     ll1 <- loo::extract_log_lik(object1@external$mcmcout)
+  } else if(blavInspect(object1, "categorical") && lavopt1$test != "none"){
+    if("llnsamp" %in% names(lavopt1)){
+      cat("blavaan NOTE: These criteria involve likelihood approximations that may be imprecise.\n",
+          "You could try running the model again to see how much the criteria fluctuate.\n",
+          "You can also manually set llnsamp for greater accuracy (but also greater runtime).\n\n")
+    }
+    ll1 <- object1@external$casells
   } else {
     lavopt1$estimator <- "ML"
     ll1 <- case_lls(object1@external$mcmcout, make_mcmc(object1@external$mcmcout),
@@ -29,20 +33,27 @@ blavCompare <- function(object1, object2, ...) {
   cid1 <- rep(1:nchain1, each=niter1)
   ref1 <- relative_eff(exp(ll1), chain_id = cid1)
 
-  if(targ2 == "stan"){
+  if(targ2 == "stan" && blavInspect(object2, "meanstructure")){
     ll2 <- loo::extract_log_lik(object2@external$mcmcout)
+  } else if(blavInspect(object2, "categorical") && lavopt2$test != "none"){
+    if("llnsamp" %in% names(lavopt2)){
+      cat("blavaan NOTE: These criteria involve likelihood approximations that may be imprecise.\n",
+          "You could try running the model again to see how much the criteria fluctuate.\n",
+          "You can also manually set llnsamp for greater accuracy (but also greater runtime).\n\n")
+    }
+    ll2 <- object2@external$casells
   } else {
     lavopt2$estimator <- "ML"
     ll2 <- case_lls(object2@external$mcmcout, make_mcmc(object2@external$mcmcout),
                     object2)
-  }
+  }  
   nchain2 <- blavInspect(object1, "n.chains")
   niter2 <- nrow(ll2)/nchain2
   cid2 <- rep(1:nchain2, each=niter2)
   ref2 <- relative_eff(exp(ll2), chain_id = cid2)
 
-  loo1 <- loo(ll1, r_eff=ref1)
-  loo2 <- loo(ll2, r_eff=ref2)
+  loo1 <- loo(ll1, r_eff=ref1, ...)
+  loo2 <- loo(ll2, r_eff=ref2, ...)
   waic1 <- waic(ll1); waic2 <- waic(ll2)
 
   diff_loo <- loo_compare(loo1, loo2)
@@ -52,28 +63,28 @@ blavCompare <- function(object1, object2, ...) {
       paste("object1: ", round( waic1$estimates[3,1], 3) ), "\n",
       paste("object2: ", round( waic2$estimates[3,1], 3) ), "\n" )
 
-  cat("\nWAIC difference & SE: \n", 
+  cat("\n ELPD difference & SE: \n", 
       sprintf("%8.3f", diff_waic[2, 1]), 
       sprintf("%8.3f", diff_waic[2, 2]), "\n")
 
   cat("\nLOO estimates: \n",
       paste("object1: ", round( loo1$estimates[3,1], 3) ), "\n",
       paste("object2: ", round( loo2$estimates[3,1], 3) ), "\n" )
 
-  cat("\nLOO difference & SE: \n", 
+  cat("\n ELPD difference & SE: \n", 
       sprintf("%8.3f", diff_loo[2, 1]), 
       sprintf("%8.3f", diff_loo[2, 2]), "\n\n")
 
   cat("Laplace approximation to the log-Bayes factor\n(experimental; positive values favor object1):",
       sprintf("%8.3f", bf), "\n\n")
 
-  looobj <- list(loo1$estimates, loo2$estimates)
-  waicobj <- list(waic1$estimates, waic2$estimates)
+  looobj <- list(loo1, loo2)
+  waicobj <- list(waic1, waic2)
 
   res <- list(bf = res, loo = looobj,
-              diff_loo = diff_loo[2,1:2],
+              diff_loo = diff_loo,
               waic = waicobj,
-              diff_waic = diff_waic[2,1:2])
+              diff_waic = diff_waic)
 
   invisible(res)
 }

diff --git a/R/blav_model_loglik.R b/R/blav_model_loglik.R
@@ -45,7 +45,7 @@ get_ll_cont <- function(postsamp       = NULL, # one posterior sample
   } else {
     implied <- lav_model_implied(lavmodel, delta = (lavmodel@parameterization == "delta"))
   }
-  
+
   ## check for missing, to see if we can easily get baseline ll for chisq
   mis <- FALSE
   if(any(is.na(unlist(lavdata@X)))){
@@ -344,58 +344,62 @@ get_ll_ord <- function(postsamp       = NULL, # one posterior sample
       ord.idx <- unique(TH.idx)
       ord.idx <- ord.idx[ord.idx %in% obsidx[1:Nobs[mm]]]
       nord <- length(ord.idx)
-      if(length(obsnum) > 0){
-        s12s22i <- cmat[ord.idx, obsnum] %*% chol2inv(chol(cmat[obsnum, obsnum]))
-        cov.ord <- cmat[ord.idx, ord.idx] - s12s22i %*% cmat[obsnum, ord.idx]
-      } else {
-        tmpll <- rep(0, r2 - r1 + 1)
-        cov.ord <- cmat[ord.idx, ord.idx, drop=FALSE]
-        mu.ord <- mnvec[ord.idx]
-      }
 
-      mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0,lavmodel@nmat))[g]
-      mms <- lavmodel@GLIST[mm.in.group]
-      tau <- mms$tau
-
-      ## thresholds for all cases
-      lowtau <- hitau <- matrix(NA, NROW(YX[r1:r2,]), length(ord.idx))
-      for(j in seq_len(nord)){
-        tmptau <- c(-Inf, tau[TH.idx == ord.idx[j]], Inf)
-        lowtau[,j] <- tmptau[YX[r1:r2,ord.idx[j]]]
-        hitau[,j] <- tmptau[YX[r1:r2,ord.idx[j]] + 1]
-      }
+      ## only proceed if this missingness pattern has ordinal variables!
+      if(nord > 0){
+        if(length(obsnum) > 0){
+          s12s22i <- cmat[ord.idx, obsnum] %*% chol2inv(chol(cmat[obsnum, obsnum]))
+          cov.ord <- cmat[ord.idx, ord.idx] - s12s22i %*% cmat[obsnum, ord.idx]
+        } else {
+          tmpll <- rep(0, r2 - r1 + 1)
+          cov.ord <- cmat[ord.idx, ord.idx, drop=FALSE]
+          mu.ord <- mnvec[ord.idx]
+        }
 
-      for(i in r1:r2){
-        llidx <- i - r1 + 1
+        mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0,lavmodel@nmat))[g]
+        mms <- lavmodel@GLIST[mm.in.group]
+        tau <- mms$tau
 
-        if(conditional){
-          catprob <- pnorm(hitau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx])) -
-            pnorm(lowtau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx]))
-          lsigi <- sum( dbinom(1, size = 1, prob = catprob, log = TRUE) )
-          tmpll[llidx] <- tmpll[llidx] + lsigi
-        } else {
-          if(length(obsnum) > 0){
-            mu.ord <- mnvec[ord.idx] + s12s22i %*% (YX[i,obsnum] - mnvec[obsnum])
-          }
+        ## thresholds for all cases
+        lowtau <- hitau <- matrix(NA, NROW(YX[r1:r2,]), length(ord.idx))
+        for(j in seq_len(nord)){
+          tmptau <- c(-Inf, tau[TH.idx == ord.idx[j]], Inf)
+          lowtau[,j] <- tmptau[YX[r1:r2,ord.idx[j]]]
+          hitau[,j] <- tmptau[YX[r1:r2,ord.idx[j]] + 1]
+        }
 
-          if("llnsamp" %in% names(lavoptions)){
-            ## run tmvnsim to approximate marginal logl
-            lsigi <- try(tmvnsim::tmvnsim(llnsamp, nord,
-                                          lower = lowtau[llidx,], upper = hitau[llidx,],
-                                          means = mu.ord, sigma = cov.ord), silent = TRUE)
-            if(!inherits(lsigi, 'try-error')) lsigi <- mean(lsigi$wts)
-          } else {
-            lsigi <- try(mnormt::sadmvn(lowtau[llidx,], hitau[llidx,], mean = mu.ord, varcov = cov.ord, abseps = 1e-2))
-          }
+        for(i in r1:r2){
+          llidx <- i - r1 + 1
 
-          if(inherits(lsigi, 'try-error')){
-            tmpll[llidx] <- NA
+          if(conditional){
+            catprob <- pnorm(hitau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx])) -
+              pnorm(lowtau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx]))
+            lsigi <- sum( dbinom(1, size = 1, prob = catprob, log = TRUE) )
+            tmpll[llidx] <- tmpll[llidx] + lsigi
           } else {
-            tmpll[llidx] <- tmpll[llidx] + log(lsigi)
+            if(length(obsnum) > 0){
+              mu.ord <- mnvec[ord.idx] + s12s22i %*% (YX[i,obsnum] - mnvec[obsnum])
+            }
+
+            if("llnsamp" %in% names(lavoptions)){
+              ## run tmvnsim to approximate marginal logl
+              lsigi <- try(tmvnsim::tmvnsim(llnsamp, nord,
+                                            lower = lowtau[llidx,], upper = hitau[llidx,],
+                                            means = mu.ord, sigma = cov.ord), silent = TRUE)
+              if(!inherits(lsigi, 'try-error')) lsigi <- mean(lsigi$wts)
+            } else {
+              lsigi <- try(mnormt::sadmvn(lowtau[llidx,], hitau[llidx,], mean = mu.ord, varcov = cov.ord, abseps = 1e-2))
+            }
+
+            if(inherits(lsigi, 'try-error')){
+              tmpll[llidx] <- NA
+            } else {
+              tmpll[llidx] <- tmpll[llidx] + log(lsigi)
+            }
           }
         }
       }
-            
+
       if(casewise){
         ll.samp[r1:r2] <- tmpll
       } else {
@@ -538,7 +542,8 @@ case_lls <- function(lavjags        = NULL,
                      lavmcmc        = NULL,
                      lavobject      = NULL,
                      conditional    = FALSE,
-                     thin           = 1){
+                     thin           = 1,
+                     debug          = FALSE){
 
   lavdata <- lavobject@Data
 
@@ -556,7 +561,13 @@ case_lls <- function(lavjags        = NULL,
       get_ll(lavmcmc[[j]][itnums[i],], lavobject,
              casewise = TRUE, conditional = conditional)},
       j = j, future.seed = TRUE)
-    tmpres[[j]] <- t(do.call("future_sapply", loop.args))
+
+    loopcom <- "future_sapply"
+    if(debug) {
+      loop.args$future.seed <- NULL
+      loopcom <- "sapply"
+    }
+    tmpres[[j]] <- t(do.call(loopcom, loop.args))
   }
 
   llmat <- do.call("rbind", tmpres)
@@ -587,7 +598,7 @@ llx_2l <- function(Lp, YX, mean_d, cidx){
   } else {
     loglik.x.b <- rep(0, nrow(mean_d))
   }
-  loglik.x <- loglik.x.w.clus + loglik.x.b
+  loglik.x <- loglik.x.w.clus + as.numeric(loglik.x.b)
 
   array(loglik.x, length(loglik.x))
 }