Skip to content

Commit

Permalink
Merge pull request #4 from ecmerkle/master
Browse files Browse the repository at this point in the history
update to latest blavaan
  • Loading branch information
maugavilla authored Jan 29, 2024
2 parents 84e6d54 + df6f12f commit 56bc773
Show file tree
Hide file tree
Showing 37 changed files with 1,312 additions and 858 deletions.
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: blavaan
Title: Bayesian Latent Variable Analysis
Version: 0.4-9.1142
Version: 0.5-3.1230
Authors@R: c(person(given = "Edgar", family = "Merkle",
role = c("aut", "cre"),
email = "[email protected]",
Expand Down Expand Up @@ -35,10 +35,12 @@ Description: Fit a variety of Bayesian latent variable models, including confirm
License: GPL (>= 3)
ByteCompile: true
Depends: R(>= 3.5.0), methods, Rcpp(>= 0.12.15)
Imports: stats, utils, graphics, lavaan(>= 0.6-14), coda, mnormt, nonnest2(>= 0.5-5), loo(>= 2.0), rstan(>= 2.21.2), rstantools(>= 1.5.0), RcppParallel (>= 5.0.1), bayesplot, Matrix, future.apply, tmvnsim
LinkingTo: StanHeaders (>= 2.18.1), rstan (>= 2.21.2), BH (>= 1.69.0), Rcpp (>= 0.12.15), RcppEigen (>= 0.3.3.4.0), RcppParallel (>= 5.0.1)
Imports: stats, utils, graphics, lavaan(>= 0.6-17), coda, mnormt, nonnest2(>= 0.5-5), loo(>= 2.0), rstan(>= 2.26.0), rstantools(>= 1.5.0), RcppParallel (>= 5.0.1), bayesplot, Matrix, future.apply, tmvnsim
LinkingTo: StanHeaders (>= 2.26.0), rstan (>= 2.26.0), BH (>= 1.69.0), Rcpp (>= 0.12.15), RcppEigen (>= 0.3.3.4.0), RcppParallel (>= 5.0.1)
Suggests: runjags(>= 2.0.4-3), modeest(>= 2.3.3), rjags, cmdstanr, semTools, tinytest
SystemRequirements: GNU make
NeedsCompilation: yes
URL: https://ecmerkle.github.io/blavaan/, https://github.com/ecmerkle/blavaan
BugReports: https://github.com/ecmerkle/blavaan/issues
Additional_repositories: https://mc-stan.org/r-packages/
Config/Needs/website: brms
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ importFrom("stats",
"runif", "sd", "quantile", "rWishart", "cov", "cor",
"coef", "logLik",
"residuals", "resid",
"fitted.values", "fitted",
"fitted.values", "fitted", "na.omit",
"predict",
"update",
"anova",
Expand Down
44 changes: 44 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,54 @@

# Version 0.5-3
## New features
* Functionality to find unrestricted blocks of the model's psi matrix (lv covariance matrix). lkj priors are assigned to these unrestricted blocks, improving the positive definite issue described in the "Opaque priors" paper.

* Improved functionality for obtaining posterior modes via, e.g., summary(., postmode = TRUE)

* blavCompare() messaging is improved to clarify ELPD differences, and the function returns more output.

* Bug fix in two-level models with within-only observed variables, messaging added for unstable ppp.

* When extracting posterior draws via blavInspect(., "mcmc"), column names now match lavaan parameter names. For old behavior involving Stan parameter names, use argument add.labels = FALSE

* Bugs from 0.5-2 are fixed.

## Bugs/glitches discovered after the release:
* Some models with exogenous covariates, fixed.x=TRUE, and missing data fail to converge and yield implausible parameter values (reported by DeAnne Hunter).


# Version 0.5-2
## New features
* This is a maintenance release, primarily adding the new array declaration syntax in Stan models (syntax that became available in the new version of rstan).

## Bugs/glitches discovered after the release:
* blavCompare() does not work with models that have meanstructure = FALSE (reported by Pedro Ribeiro).

* for target="jags", posterior modes cannot be obtained via postmode = TRUE (reported by Giada Venaruzzo).

* models with both continuous and ordinal variables fail for cases where all ordinal variables are missing (reported by Sonja Winter).

* certain equality constraints involving named parameters fail for target="stan" (reported by Niels Skovgaard-Olsen)

# Version 0.5-1
## New features
* Two-level models are now supported (for complete, continuous data) via the cluster argument.

## Bugs/glitches discovered after the release:
* For two-level model specification, the levels have to be labeled "within" and "between". This is more restrictive than lavaan specification.

* For target="jags", latent variable extraction via blavInspect(, "lvs") fails (reported by Joseph Saraceno).

# Version 0.4-8
## New features
* This is a maintenance release with bug fixes and some changes in compiler settings

## Bugs/glitches discovered after the release:
* For certain models with residual correlations and/or correlated factors, the initial values under target='stan' lead to non-positive definite matrices (reported by Yuanyuan Hu).

* For models where a latent variable is regressed on an observed variable (lv ~ ov), the latent variable samples do not account for the mean of the observed variable (they are centered around 0 and off by a constant).


# Version 0.4-7
## New features
* This is primarily an update to address a C++14 vs C++17 compilation issue identified by CRAN
Expand Down
39 changes: 25 additions & 14 deletions R/blav_compare.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ blavCompare <- function(object1, object2, ...) {
res <- c(bf, object1@test[[1]]$stat, object2@test[[1]]$stat)
names(res) <- c("bf", "mll1", "mll2")

## FIXME? We already get case_lls in blav_fit_measures and should really
## only do it once. But, if we store it in the blavaan object, the size
## of that object can get much larger.
if(targ1 == "stan"){
if(targ1 == "stan" && blavInspect(object1, "meanstructure")){
ll1 <- loo::extract_log_lik(object1@external$mcmcout)
} else if(blavInspect(object1, "categorical") && lavopt1$test != "none"){
if("llnsamp" %in% names(lavopt1)){
cat("blavaan NOTE: These criteria involve likelihood approximations that may be imprecise.\n",
"You could try running the model again to see how much the criteria fluctuate.\n",
"You can also manually set llnsamp for greater accuracy (but also greater runtime).\n\n")
}
ll1 <- object1@external$casells
} else {
lavopt1$estimator <- "ML"
ll1 <- case_lls(object1@external$mcmcout, make_mcmc(object1@external$mcmcout),
Expand All @@ -29,20 +33,27 @@ blavCompare <- function(object1, object2, ...) {
cid1 <- rep(1:nchain1, each=niter1)
ref1 <- relative_eff(exp(ll1), chain_id = cid1)

if(targ2 == "stan"){
if(targ2 == "stan" && blavInspect(object2, "meanstructure")){
ll2 <- loo::extract_log_lik(object2@external$mcmcout)
} else if(blavInspect(object2, "categorical") && lavopt2$test != "none"){
if("llnsamp" %in% names(lavopt2)){
cat("blavaan NOTE: These criteria involve likelihood approximations that may be imprecise.\n",
"You could try running the model again to see how much the criteria fluctuate.\n",
"You can also manually set llnsamp for greater accuracy (but also greater runtime).\n\n")
}
ll2 <- object2@external$casells
} else {
lavopt2$estimator <- "ML"
ll2 <- case_lls(object2@external$mcmcout, make_mcmc(object2@external$mcmcout),
object2)
}
}
nchain2 <- blavInspect(object1, "n.chains")
niter2 <- nrow(ll2)/nchain2
cid2 <- rep(1:nchain2, each=niter2)
ref2 <- relative_eff(exp(ll2), chain_id = cid2)

loo1 <- loo(ll1, r_eff=ref1)
loo2 <- loo(ll2, r_eff=ref2)
loo1 <- loo(ll1, r_eff=ref1, ...)
loo2 <- loo(ll2, r_eff=ref2, ...)
waic1 <- waic(ll1); waic2 <- waic(ll2)

diff_loo <- loo_compare(loo1, loo2)
Expand All @@ -52,28 +63,28 @@ blavCompare <- function(object1, object2, ...) {
paste("object1: ", round( waic1$estimates[3,1], 3) ), "\n",
paste("object2: ", round( waic2$estimates[3,1], 3) ), "\n" )

cat("\nWAIC difference & SE: \n",
cat("\n ELPD difference & SE: \n",
sprintf("%8.3f", diff_waic[2, 1]),
sprintf("%8.3f", diff_waic[2, 2]), "\n")

cat("\nLOO estimates: \n",
paste("object1: ", round( loo1$estimates[3,1], 3) ), "\n",
paste("object2: ", round( loo2$estimates[3,1], 3) ), "\n" )

cat("\nLOO difference & SE: \n",
cat("\n ELPD difference & SE: \n",
sprintf("%8.3f", diff_loo[2, 1]),
sprintf("%8.3f", diff_loo[2, 2]), "\n\n")

cat("Laplace approximation to the log-Bayes factor\n(experimental; positive values favor object1):",
sprintf("%8.3f", bf), "\n\n")

looobj <- list(loo1$estimates, loo2$estimates)
waicobj <- list(waic1$estimates, waic2$estimates)
looobj <- list(loo1, loo2)
waicobj <- list(waic1, waic2)

res <- list(bf = res, loo = looobj,
diff_loo = diff_loo[2,1:2],
diff_loo = diff_loo,
waic = waicobj,
diff_waic = diff_waic[2,1:2])
diff_waic = diff_waic)

invisible(res)
}
Expand Down
105 changes: 58 additions & 47 deletions R/blav_model_loglik.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ get_ll_cont <- function(postsamp = NULL, # one posterior sample
} else {
implied <- lav_model_implied(lavmodel, delta = (lavmodel@parameterization == "delta"))
}

## check for missing, to see if we can easily get baseline ll for chisq
mis <- FALSE
if(any(is.na(unlist(lavdata@X)))){
Expand Down Expand Up @@ -344,58 +344,62 @@ get_ll_ord <- function(postsamp = NULL, # one posterior sample
ord.idx <- unique(TH.idx)
ord.idx <- ord.idx[ord.idx %in% obsidx[1:Nobs[mm]]]
nord <- length(ord.idx)
if(length(obsnum) > 0){
s12s22i <- cmat[ord.idx, obsnum] %*% chol2inv(chol(cmat[obsnum, obsnum]))
cov.ord <- cmat[ord.idx, ord.idx] - s12s22i %*% cmat[obsnum, ord.idx]
} else {
tmpll <- rep(0, r2 - r1 + 1)
cov.ord <- cmat[ord.idx, ord.idx, drop=FALSE]
mu.ord <- mnvec[ord.idx]
}

mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0,lavmodel@nmat))[g]
mms <- lavmodel@GLIST[mm.in.group]
tau <- mms$tau

## thresholds for all cases
lowtau <- hitau <- matrix(NA, NROW(YX[r1:r2,]), length(ord.idx))
for(j in seq_len(nord)){
tmptau <- c(-Inf, tau[TH.idx == ord.idx[j]], Inf)
lowtau[,j] <- tmptau[YX[r1:r2,ord.idx[j]]]
hitau[,j] <- tmptau[YX[r1:r2,ord.idx[j]] + 1]
}
## only proceed if this missingness pattern has ordinal variables!
if(nord > 0){
if(length(obsnum) > 0){
s12s22i <- cmat[ord.idx, obsnum] %*% chol2inv(chol(cmat[obsnum, obsnum]))
cov.ord <- cmat[ord.idx, ord.idx] - s12s22i %*% cmat[obsnum, ord.idx]
} else {
tmpll <- rep(0, r2 - r1 + 1)
cov.ord <- cmat[ord.idx, ord.idx, drop=FALSE]
mu.ord <- mnvec[ord.idx]
}

for(i in r1:r2){
llidx <- i - r1 + 1
mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0,lavmodel@nmat))[g]
mms <- lavmodel@GLIST[mm.in.group]
tau <- mms$tau

if(conditional){
catprob <- pnorm(hitau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx])) -
pnorm(lowtau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx]))
lsigi <- sum( dbinom(1, size = 1, prob = catprob, log = TRUE) )
tmpll[llidx] <- tmpll[llidx] + lsigi
} else {
if(length(obsnum) > 0){
mu.ord <- mnvec[ord.idx] + s12s22i %*% (YX[i,obsnum] - mnvec[obsnum])
}
## thresholds for all cases
lowtau <- hitau <- matrix(NA, NROW(YX[r1:r2,]), length(ord.idx))
for(j in seq_len(nord)){
tmptau <- c(-Inf, tau[TH.idx == ord.idx[j]], Inf)
lowtau[,j] <- tmptau[YX[r1:r2,ord.idx[j]]]
hitau[,j] <- tmptau[YX[r1:r2,ord.idx[j]] + 1]
}

if("llnsamp" %in% names(lavoptions)){
## run tmvnsim to approximate marginal logl
lsigi <- try(tmvnsim::tmvnsim(llnsamp, nord,
lower = lowtau[llidx,], upper = hitau[llidx,],
means = mu.ord, sigma = cov.ord), silent = TRUE)
if(!inherits(lsigi, 'try-error')) lsigi <- mean(lsigi$wts)
} else {
lsigi <- try(mnormt::sadmvn(lowtau[llidx,], hitau[llidx,], mean = mu.ord, varcov = cov.ord, abseps = 1e-2))
}
for(i in r1:r2){
llidx <- i - r1 + 1

if(inherits(lsigi, 'try-error')){
tmpll[llidx] <- NA
if(conditional){
catprob <- pnorm(hitau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx])) -
pnorm(lowtau[llidx,], mean = mnvec[i,ord.idx], sd = sqrt(diag(cmat)[ord.idx]))
lsigi <- sum( dbinom(1, size = 1, prob = catprob, log = TRUE) )
tmpll[llidx] <- tmpll[llidx] + lsigi
} else {
tmpll[llidx] <- tmpll[llidx] + log(lsigi)
if(length(obsnum) > 0){
mu.ord <- mnvec[ord.idx] + s12s22i %*% (YX[i,obsnum] - mnvec[obsnum])
}

if("llnsamp" %in% names(lavoptions)){
## run tmvnsim to approximate marginal logl
lsigi <- try(tmvnsim::tmvnsim(llnsamp, nord,
lower = lowtau[llidx,], upper = hitau[llidx,],
means = mu.ord, sigma = cov.ord), silent = TRUE)
if(!inherits(lsigi, 'try-error')) lsigi <- mean(lsigi$wts)
} else {
lsigi <- try(mnormt::sadmvn(lowtau[llidx,], hitau[llidx,], mean = mu.ord, varcov = cov.ord, abseps = 1e-2))
}

if(inherits(lsigi, 'try-error')){
tmpll[llidx] <- NA
} else {
tmpll[llidx] <- tmpll[llidx] + log(lsigi)
}
}
}
}

if(casewise){
ll.samp[r1:r2] <- tmpll
} else {
Expand Down Expand Up @@ -538,7 +542,8 @@ case_lls <- function(lavjags = NULL,
lavmcmc = NULL,
lavobject = NULL,
conditional = FALSE,
thin = 1){
thin = 1,
debug = FALSE){

lavdata <- lavobject@Data

Expand All @@ -556,7 +561,13 @@ case_lls <- function(lavjags = NULL,
get_ll(lavmcmc[[j]][itnums[i],], lavobject,
casewise = TRUE, conditional = conditional)},
j = j, future.seed = TRUE)
tmpres[[j]] <- t(do.call("future_sapply", loop.args))

loopcom <- "future_sapply"
if(debug) {
loop.args$future.seed <- NULL
loopcom <- "sapply"
}
tmpres[[j]] <- t(do.call(loopcom, loop.args))
}

llmat <- do.call("rbind", tmpres)
Expand Down Expand Up @@ -587,7 +598,7 @@ llx_2l <- function(Lp, YX, mean_d, cidx){
} else {
loglik.x.b <- rep(0, nrow(mean_d))
}
loglik.x <- loglik.x.w.clus + loglik.x.b
loglik.x <- loglik.x.w.clus + as.numeric(loglik.x.b)

array(loglik.x, length(loglik.x))
}
Loading

0 comments on commit 56bc773

Please sign in to comment.