From 28dc3d27d8b5e1c4d036c8ab16a3bb801105470b Mon Sep 17 00:00:00 2001 From: Keefe-Murphy Date: Wed, 19 May 2021 19:25:52 +0100 Subject: [PATCH] Extended G_calibrate to use the Rmpfr & gmp libaries, use the full theoretical ranges for the uniroot intervals, and work when alpha=0. Major speed-up to G_expected when alpha=0. G_variance also computed more accurately and efficiently when alpha=0. Minor speed-up to G_priorDensity for non-zero discount. Minor .version_above() and ifelse fixes. Minor speed-ups to simulation of scores/loadings (via backsolve() improvement) and speed-ups to local/column/cluster shrinkage parameters (via pre-computation). Prepared CRAN release. --- DESCRIPTION | 4 +- R/Diagnostics.R | 4 +- R/FullConditionals.R | 167 +++++++++++++++++++++++++----------------- R/Gibbs_IFA.R | 8 +- R/Gibbs_IMIFA.R | 12 +-- R/Gibbs_MIFA.R | 10 ++- R/Gibbs_OMIFA.R | 12 +-- R/IMIFA.R | 4 +- R/PlottingFunctions.R | 22 +++--- inst/NEWS.md | 5 ++ man/G_moments.Rd | 28 ++++--- man/G_priorDensity.Rd | 2 +- man/IMIFA-package.Rd | 4 +- vignettes/IMIFA.Rmd | 2 +- 14 files changed, 170 insertions(+), 114 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ff610ef..a951829 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: IMIFA Type: Package -Date: 2020-12-29 +Date: 2021-05-24 Title: Infinite Mixtures of Infinite Factor Analysers and Related Models -Version: 2.1.5 +Version: 2.1.6 Authors@R: c(person("Keefe", "Murphy", email = "keefe.murphy@mu.ie", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7709-3159")), person("Cinzia", "Viroli", email = "cinzia.viroli@unibo.it", role = "ctb", comment = c(ORCID = "0000-0002-3278-5266")), person("Isobel Claire", "Gormley", email = "claire.gormley@ucd.ie", role = "ctb", comment = c(ORCID = "0000-0001-7713-681X"))) diff --git a/R/Diagnostics.R b/R/Diagnostics.R index b7d5a97..c3213f2 100644 --- a/R/Diagnostics.R +++ b/R/Diagnostics.R @@ -355,8 +355,8 @@ get_IMIFA_results.IMIFA <- function(sims = NULL, burnin = 0L, thinning = GQ.temp2 <- list(AICMs = aicm, BICMs = bicm, DICs = dic) clust.ind <- !any(is.element(method, c("FA", "IFA")), all(is.element(method, c("MFA", "MIFA")), G == 1)) - sw.mx <- ifelse(clust.ind, sw["mu.sw"], TRUE) - sw.px <- ifelse(clust.ind, sw["psi.sw"], TRUE) + sw.mx <- !clust.ind || sw["mu.sw"] + sw.px <- !clust.ind || sw["psi.sw"] if(!inf.Q) { Q <- if(length(n.fac) > 1) Q else n.fac diff --git a/R/FullConditionals.R b/R/FullConditionals.R index b3e6b63..56c334e 100644 --- a/R/FullConditionals.R +++ b/R/FullConditionals.R @@ -16,20 +16,20 @@ u.eta <- diag(Q) + crossprod(load.psi, lmat) u.eta <- if(Q1) sqrt(u.eta) else .chol(u.eta) mu.eta <- c.data %*% (load.psi %*% if(Q1) 1/(u.eta * u.eta) else chol2inv(u.eta)) - mu.eta + t(backsolve(u.eta, matrnorm(Q, N))) + mu.eta + t(backsolve(u.eta, matrnorm(Q, N), k=Q)) } # Loadings .sim_load <- function(l.sigma, Q, c.data, eta, psi.inv, EtE, Q1) { u.load <- l.sigma + psi.inv * EtE u.load <- if(Q1) sqrt(u.load) else .chol(u.load) - psi.inv * (if(Q1) 1/(u.load * u.load) else chol2inv(u.load)) %*% crossprod(eta, c.data) + backsolve(u.load, stats::rnorm(Q)) + psi.inv * (if(Q1) 1/(u.load * u.load) else chol2inv(u.load)) %*% crossprod(eta, c.data) + backsolve(u.load, stats::rnorm(Q), k=Q) } .sim_load_s <- function(Q, c.data, eta, phi, tau, psi.inv, EtE, Q1, sigma = 1L) { u.load <- diag(phi * tau * sigma, Q) + psi.inv * EtE u.load <- if(Q1) sqrt(u.load) else .chol(u.load) - psi.inv * (if(Q1) 1/(u.load * u.load) else chol2inv(u.load)) %*% crossprod(eta, c.data) + backsolve(u.load, stats::rnorm(Q)) + psi.inv * (if(Q1) 1/(u.load * u.load) else chol2inv(u.load)) %*% crossprod(eta, c.data) + backsolve(u.load, stats::rnorm(Q), k=Q) } # Uniquenesses @@ -63,22 +63,22 @@ } # Local Shrinkage - .sim_phi <- function(Q, P, nu1, nu2, tau, load.2, sigma = 1L) { - matrix(stats::rgamma(P * Q, shape=nu1 + 0.5, rate=nu2 + (sigma * sweep(load.2, 2L, tau, FUN="*", check.margin=FALSE))/2), nrow=P, ncol=Q) + .sim_phi <- function(Q, P, nu1.5, nu2, tau, load.2, sigma = 1L) { + matrix(stats::rgamma(P * Q, shape=nu1.5, rate=nu2 + (sigma * sweep(load.2, 2L, tau, FUN="*", check.margin=FALSE))/2), nrow=P, ncol=Q) } # Column Shrinkage - .sim_delta1 <- function(Q, P, alpha.d1, delta.1, beta.d1, tau, sum.term, sigma = 1L) { - stats::rgamma(1, shape=alpha.d1 + P * Q/2, rate=beta.d1 + (sigma * 0.5)/delta.1 * tau %*% sum.term) + .sim_delta1 <- function(Q, P.5, alpha.d1, delta.1, beta.d1, tau, sum.term, sigma = 1L) { + stats::rgamma(1, shape=alpha.d1 + P.5 * Q, rate=beta.d1 + (sigma * 0.5)/delta.1 * tau %*% sum.term) } - .sim_deltak <- function(Q, P, k, alpha.d2, beta.d2, delta.k, tau.kq, sum.term.kq, sigma = 1L) { - stats::rgamma(1, shape=alpha.d2 + P/2 * (Q - k + 1L), rate=beta.d2 + (sigma * 0.5)/delta.k * tau.kq %*% sum.term.kq) + .sim_deltak <- function(Q, P.5, k, alpha.d2, beta.d2, delta.k, tau.kq, sum.term.kq, sigma = 1L) { + stats::rgamma(1, shape=alpha.d2 + P.5 * (Q - k + 1L), rate=beta.d2 + (sigma * 0.5)/delta.k * tau.kq %*% sum.term.kq) } # Cluster Shrinkage - .sim_sigma <- function(G, P, Qs, rho1, rho2, sum.terms, tau) { - stats::rgamma(G, shape=rho1 + (P * Qs)/2, rate=rho2 + mapply("%*%", sum.terms, tau)/2) + .sim_sigma <- function(G, P.5, Qs, rho1, rho2, sum.terms, tau) { + stats::rgamma(G, shape=rho1 + P.5 * Qs, rate=rho2 + mapply("%*%", sum.terms, tau)/2) } # Mixing Proportions @@ -575,8 +575,8 @@ exp.seq <- lapply(ML, function(i) exp.Q1[i] * exp.Qk[i]^Qseq) check <- !vapply(exp.seq, is.unsorted, logical(1L)) } - exp.seq <- if(length(exp.seq) == 1) exp.seq[[1L]] else exp.seq - res <- list(expectation = exp.seq, valid = if(Q < 2) TRUE else check) + exp.seq <- if(length(exp.seq) == 1) exp.seq[[1L]] else exp.seq + res <- list(expectation = exp.seq, valid = Q < 2 || check) attr(res, "Warning") <- WX return(res) } @@ -1044,24 +1044,26 @@ #' @param N The sample size. #' @param alpha The concentration parameter. Must be specified (though not for \code{G_calibrate}) and must be strictly greater than \code{-discount}. The case \code{alpha=0} is accommodated. When \code{discount} is negative \code{alpha} must be a positive integer multiple of \code{abs(discount)}. See \strong{Details} for behaviour for \code{G_calibrate}. #' @param discount The discount parameter for the Pitman-Yor process. Must be less than 1, but typically lies in the interval [0, 1). Defaults to 0 (i.e. the Dirichlet process). When \code{discount} is negative \code{alpha} must be a positive integer multiple of \code{abs(discount)}. See \strong{Details} for behaviour for \code{G_calibrate}. -#' @param MPFR Logical indicating whether the high-precision libraries \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} are invoked, at the expense of run-time. Defaults to \code{TRUE} and \strong{must} be \code{TRUE} for \code{\link{G_expected}} when \code{alpha=0} and \code{\link{G_variance}} when \code{discount} is non-zero. See \strong{\code{Note}}. -#' @param EG The prior expected number of clusters. Must exceed \code{1}. +#' @param MPFR Logical indicating whether the high-precision libraries \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} are invoked, at the expense of run-time. Defaults to \code{TRUE} and \strong{must} be \code{TRUE} for \code{G_expected} when \code{alpha=0} or \code{G_variance} when \code{discount} is non-zero. For \code{G_calibrate}, it is \emph{strongly recommended} to use \code{MPFR=TRUE} when \code{discount} is non-zero and strictly necessary when \code{alpha=0} is supplied. See \strong{\code{Note}}. +#' @param EG The prior expected number of clusters. Must exceed \code{1} and be less than \code{N}. #' @param ... Additional arguments passed to \code{\link[stats]{uniroot}}, e.g. \code{maxiter}. #' #' @details All arguments are vectorised. Users can also consult \code{\link{G_priorDensity}} in order to solicit sensible priors. #' -#' For \code{G_calibrate}, \strong{only one} of \code{alpha} or \code{discount} can be supplied, and the function elicits a value for the opposing parameter which achieves the desired expected number of clusters \code{EG} for the given sample size \code{N}. By default, a value for \code{alpha} subject to \code{discount=0} (i.e. the Dirichlet process) is elicited. See \strong{Examples} below. +#' For \code{G_calibrate}, \strong{only one} of \code{alpha} or \code{discount} can be supplied, and the function elicits a value for the opposing parameter which achieves the desired expected number of clusters \code{EG} for the given sample size \code{N}. By default, a value for \code{alpha} subject to \code{discount=0} (i.e. the Dirichlet process) is elicited. Note that \code{alpha} may not be a positive integer multiple of \code{discount} as it should be if \code{discount} is negative. See \strong{Examples} below. #' @return The expected number of clusters under the specified prior conditions (\code{G_expected}), or the variance of the number of clusters (\code{G_variance}), or the concentration parameter \code{alpha} \strong{or} \code{discount} parameter achieving a particular expected number of clusters (\code{G_calibrate}). #' @keywords utility #' @export #' @name G_moments #' @rdname G_moments #' -#' @note \code{G_variance} requires use of the \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} libraries for non-zero \code{discount} values. \code{G_expected} requires these libraries only for the \code{alpha=0} case. Despite the high precision arithmetic used, the functions can still be unstable for small values of \code{discount}. See the argument \code{MPFR}. +#' @note \code{G_variance} requires use of the \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} libraries for non-zero \code{discount} values. \code{G_expected} requires these libraries only for the \code{alpha=0} case. These libraries are \emph{strongly recommended} (but they are not required) for \code{G_calbirate} when \code{discount} is non-zero, but they are required when \code{alpha=0} is supplied. Despite the high precision arithmetic used, the functions can still be unstable for large \code{N} and/or extreme values of \code{alpha} and/or \code{discount}. See the argument \code{MPFR}. #' #' @seealso \code{\link{G_priorDensity}}, \code{\link[Rmpfr]{Rmpfr}}, \code{\link[stats]{uniroot}} #' @references De Blasi, P., Favaro, S., Lijoi, A., Mena, R. H., Prunster, I., and Ruggiero, M. (2015) Are Gibbs-type priors the most natural generalization of the Dirichlet process?, \emph{IEEE Transactions on Pattern Analysis and Machine Intelligence}, 37(2): 212-229. #' +#' Yamato, H. and Shibuya, M. (2000) Moments of some statistics of Pitman sampling formula, \emph{Bulletin of Informatics and Cybernetics}, 32(1): 1-10. +#' #' @author Keefe Murphy - <\email{keefe.murphy@@mu.ie}> #' @usage #' G_expected(N, @@ -1087,54 +1089,48 @@ #' # matplot(PY, type="l", xlab="N", ylab="G") #' #' # Other special cases of the PYP are also facilitated -#' # G_expected(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054447)) -#' # G_variance(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054447)) +#' # G_expected(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054448)) +#' # G_variance(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054448)) #' -#' # Elicit values for alpha +#' # Elicit values for alpha under a DP prior #' G_calibrate(N=50, EG=25) -#' G_calibrate(N=50, EG=25, discount=c(0.25, 0.7300045)) #' -#' # Elicit values for discount -#' G_calibrate(N=50, EG=25, alpha=c(12.21619, 1)) +#' # Elicit values for alpha under a PYP prior +#' # require("Rmpfr") +#' # G_calibrate(N=50, EG=25, discount=c(-27.1401/100, 0.25, 0.7300045)) +#' +#' # Elicit values for discount under a PYP prior +#' # G_calibrate(N=50, EG=25, alpha=c(12.21619, 1, 0), maxiter=2000) G_expected <- Vectorize(function(N, alpha, discount = 0, MPFR = TRUE) { if(!all(is.numeric(N), is.numeric(discount), is.numeric(alpha))) stop("All inputs must be numeric", call.=FALSE) if(discount >= 1) stop("'discount' must be less than 1", call.=FALSE) if(discount > 0 && - alpha <= - discount) stop("'alpha' must be strictly greater than -discount", call.=FALSE) + alpha <= - discount) stop("'alpha' must be strictly greater than -discount", call.=FALSE) if(discount < 0 && (alpha <= 0 || !.IntMult(alpha, discount))) stop("'alpha' must be a positive integer multiple of 'abs(discount)' when 'discount' is negative", call.=FALSE) - if(alpha == 0 && discount <= 0) stop("'discount' must be strictly positive when 'alpha=0", call.=FALSE) + if(alpha == 0 && discount <= 0) stop("'discount' must be strictly positive when 'alpha'=0", call.=FALSE) if(alpha == 0 && !isTRUE(MPFR)) stop("'MPFR' must be TRUE when 'alpha' == 0", call.=FALSE) igmp <- isNamespaceLoaded("Rmpfr") if(mpfrind <- (isTRUE(MPFR) && - suppressMessages(requireNamespace("Rmpfr", quietly=TRUE)) && - .version_above("gmp", "0.5-4"))) { + suppressMessages(requireNamespace("Rmpfr", quietly=TRUE)) && + .version_above("gmp", "0.5-4"))) { if(isFALSE(igmp)) { on.exit(.detach_pkg("Rmpfr")) on.exit(.detach_pkg("gmp"), add=TRUE) } alpha <- Rmpfr::mpfr(alpha, precBits=256) - } + } else if(isTRUE(MPFR)) warning("'Rmpfr' package not installed\n", call.=FALSE, immediate.=TRUE) if(alpha == 0) { if(mpfrind) { - tmp <- sum(log(alpha + seq_len(N - 1L))) - ldisc <- log(discount) - res <- 0 - for(k in seq_len(N)) { - kseq <- seq_len(k) - res <- res + k * exp(sum(log(alpha + discount * kseq[-k])) - tmp - - k * ldisc) * sum((-1L)^kseq * gmp::chooseZ(n=k, k=kseq) - * Rmpfr::pochMpfr(-kseq * discount, N) / gmp::factorialZ(k)) - } - return(gmp::asNumeric(res)) - } else stop("'Rmpfr' must be installed when 'alpha'=0", call.=FALSE) + return(gmp::asNumeric(Rmpfr::pochMpfr(discount + 1, N - 1L)/gamma(Rmpfr::mpfr(N, precBits=256)))) + } else stop("'Rmpfr' must be installed when 'alpha'=0", call.=FALSE) } if(discount == 0) { exp <- alpha * (digamma(alpha + N) - digamma(alpha)) - #exp <- sum(alpha/(alpha + 0L:(N - 1L))) + #exp <- sum(alpha/(alpha + 0L:(N - 1L))) if(mpfrind) { gmp::asNumeric(exp) } else { @@ -1145,7 +1141,7 @@ if(mpfrind) { gmp::asNumeric(adx * Rmpfr::pochMpfr(alpha + discount, N)/Rmpfr::pochMpfr(alpha, N) - adx) } else { - adx * (prod(discount/(alpha + 0L:(N - 1L)) + 1L) - 1L) + adx * (prod(discount/(alpha + 0L:(N - 1L)) + 1L) - 1L) } } }) @@ -1163,34 +1159,40 @@ is.numeric(alpha))) stop("All inputs must be numeric", call.=FALSE) if(discount >= 1) stop("'discount' must be less than 1", call.=FALSE) if(discount > 0 && - alpha <= - discount) stop("'alpha' must be strictly greater than -discount", call.=FALSE) + alpha <= - discount) stop("'alpha' must be strictly greater than -discount", call.=FALSE) if(discount < 0 && (alpha <= 0 || !.IntMult(alpha, discount))) stop("'alpha' must be a positive integer multiple of 'abs(discount)' when 'discount' is negative", call.=FALSE) - #if(alpha == 0) { warning("'alpha'=0 case not yet implemented", call.=FALSE, immediate.=TRUE); return(Inf) } - if(discount != 0 && !isTRUE(MPFR)) stop("'MPFR' must be TRUE when 'discount' is non-zero", call.=FALSE) + if(alpha == 0 && discount <= 0) stop("'discount' must be strictly positive when 'alpha'=0", call.=FALSE) + if(discount != 0 && !isTRUE(MPFR)) stop("'MPFR' must be TRUE when 'discount' is non-zero", call.=FALSE) igmp <- isNamespaceLoaded("Rmpfr") if(mpfrind <- (isTRUE(MPFR) && - suppressMessages(requireNamespace("Rmpfr", quietly=TRUE)) && + suppressMessages(requireNamespace("Rmpfr", quietly=TRUE)) && .version_above("gmp", "0.5-4"))) { if(isFALSE(igmp)) { on.exit(.detach_pkg("Rmpfr")) on.exit(.detach_pkg("gmp"), add=TRUE) } - alpha <- Rmpfr::mpfr(alpha, precBits=256) - } else if(discount != 0) stop("'Rmpfr' package not installed", call.=FALSE) + alpha <- Rmpfr::mpfr(alpha, precBits=256) + } else { + if(discount != 0) { stop("'Rmpfr' package not installed", call.=FALSE) + } else if(isTRUE(MPFR)) warning("'Rmpfr' package not installed\n", call.=FALSE, immediate.=TRUE) + } alpha2 <- alpha * alpha if(discount == 0) { var <- alpha * (digamma(alpha + N) - digamma(alpha)) if(mpfrind) { alpha <- gmp::asNumeric(alpha) - gmp::asNumeric(var + alpha2 * (trigamma(alpha + N) - trigamma(alpha))) + gmp::asNumeric(var + alpha2 * (trigamma(alpha + N) - trigamma(alpha))) } else { var + alpha2 * (trigamma(alpha + N) - trigamma(alpha)) } + } else if(alpha == 0) { + poch.1 <- gamma(Rmpfr::mpfr(N, precBits=256)) + subterm <- Rmpfr::pochMpfr(discount + 1, N - 1L)/poch.1 + gmp::asNumeric(Rmpfr::pochMpfr(2 * discount, N)/(discount * poch.1) - subterm * (1L + subterm)) } else { - alpha <- if(alpha == 0) .Machine$double.eps else alpha sum.ad <- alpha + discount poch.a <- Rmpfr::pochMpfr(alpha, N) poch.ad <- Rmpfr::pochMpfr(sum.ad, N) @@ -1204,38 +1206,72 @@ #' @usage #' G_calibrate(N, #' EG, -#' discount = 0, #' alpha = NULL, +#' discount = 0, +#' MPFR = TRUE, #' ...) #' @export - G_calibrate <- Vectorize(function(N, EG, discount = 0, alpha = NULL, ...) { + G_calibrate <- Vectorize(function(N, EG, alpha = NULL, discount = 0, MPFR = TRUE, ...) { if(!all(is.numeric(N), is.numeric(discount), is.null(alpha) || is.numeric(alpha), is.numeric(EG))) stop("All inputs must be numeric", call.=FALSE) if(discount >= 1) stop("'discount' must be less than 1", call.=FALSE) - if(EG <= 1) stop("'EG' must be greater than 1", call.=FALSE) - RFA <- function(N, alpha, discount) prod(1 + discount/(alpha + 0L:(N - 1L))) + if(EG <= 1) stop("'EG' must be greater than 1", call.=FALSE) + if(EG >= N) stop("'EG' must be less than 'N'", call.=FALSE) + igmp <- isNamespaceLoaded("Rmpfr") + if(mpfrind <- (isTRUE(MPFR) && + suppressMessages(requireNamespace("Rmpfr", quietly=TRUE)) && + .version_above("gmp", "0.5-4"))) { + if(isFALSE(igmp)) { + on.exit(.detach_pkg("Rmpfr")) + on.exit(.detach_pkg("gmp"), add=TRUE) + } + } else if(isTRUE(MPFR)) warning("'Rmpfr' package not installed\n", call.=FALSE, immediate.=TRUE) + if(isTRUE(mpfrind) && + (!is.null(alpha) || discount != 0)) { + if(!is.null(alpha) && alpha == 0) { + p1 <- gamma(Rmpfr::mpfr(N, precBits=256)) + RFA <- function(N, discount, p1) { + gmp::asNumeric(Rmpfr::pochMpfr(discount + 1, N - 1L)/p1) + } + } else { + RFA <- function(N, alpha, discount) { + x <- gmp::asNumeric((Rmpfr::pochMpfr(alpha + discount, N)/Rmpfr::pochMpfr(alpha, N) - 1) * alpha/discount) + ifelse(x == 0, N, ifelse(is.infinite(x) && x < 0, 1, x)) + } + } + } else RFA <- function(N, alpha, discount) alpha/discount * (prod(1 + discount/(alpha + 0L:(N - 1L))) - 1) + if(is.null(alpha)) { - if(discount == 0) { - X <- try(suppressWarnings(stats::uniroot(function(x) sum(x/(x + 0L:(N - 1L))) - EG, interval=c(0.00001, 10000), ...)), silent=TRUE) + if(discount == 0) { + inter <- c(.Machine$double.eps, .Machine$double.xmax) + X <- try(suppressWarnings(stats::uniroot(function(x) sum(x/(x + 0L:(N - 1L))) - EG, interval=inter, ...)), silent=TRUE) } else { - X <- try(suppressWarnings(stats::uniroot(function(x) x/discount * (RFA(N, x, discount) - 1) - EG, interval=c(-discount + 0.00001, 10000), ...)), silent=TRUE) + inter <- if(isTRUE(mpfrind)) c(-discount + .Machine$double.eps, .Machine$double.xmax) else c(-discount + 0.000001, 100000) + X <- try(suppressWarnings(stats::uniroot(function(x) RFA(N, x, discount) - EG, interval=inter, ...)), silent=TRUE) } - if(inherits(X, "try-error")) { warning("uniroot failed to elicit an alpha value\n", call.=FALSE, immediate.=TRUE) - Y <- stats::setNames(NA, "alpha") - } else Y <- stats::setNames(X$root, "alpha") + if(inherits(X, "try-error")) { warning(paste0("uniroot failed to elicit a discount value", ifelse(isFALSE(MPFR), ": consider setting MPFR=TRUE\n","\n")), call.=FALSE, immediate.=TRUE) + Y <- stats::setNames(NA, "alpha") + } else Y <- stats::setNames(X$root, "alpha") } else if(missing(discount) || discount == 0) { - if(alpha == 0) warning("'alpha'=0 case not yet implemented\n", call.=FALSE, immediate.=TRUE) - X <- try(suppressWarnings(stats::uniroot(function(x) alpha/x * (RFA(N, alpha, x) - 1) - EG, interval=c(-10000, 1 - 0.00001), ...)), silent=TRUE) - if(inherits(X, "try-error")) { warning("uniroot failed to elicit a discount value\n", call.=FALSE, immediate.=TRUE) + if(alpha == 0) { + if(!isTRUE(MPFR)) stop("'MPFR' must be TRUE when 'alpha' == 0", call.=FALSE) + inter <- c(.Machine$double.eps, 1 - .Machine$double.eps) + X <- try(suppressWarnings(stats::uniroot(function(x) RFA(N, x, p1) - EG, interval=inter, ...)), silent=TRUE) + } else { + inter <- c(-.Machine$double.xmax, 1 - .Machine$double.eps) + X <- try(suppressWarnings(stats::uniroot(function(x) RFA(N, alpha, x) - EG, interval=inter, ...)), silent=TRUE) + } + if(inherits(X, "try-error")) { warning(paste0("uniroot failed to elicit a discount value", ifelse(isFALSE(MPFR), ": consider setting MPFR=TRUE\n","\n")), call.=FALSE, immediate.=TRUE) Y <- stats::setNames(NA, "discount") } else Y <- stats::setNames(X$root, "discount") } else stop("'alpha' and 'discount' cannot both be supplied", call.=FALSE) dots <- list(...) maxiter <- ifelse(length(dots) > 0 && any(names(dots) %in% "maxiter"), dots$maxiter, 1000) - if(X$iter == maxiter) warning(paste0("uniroot failed to converge in ", maxiter, " iterations\n"), call.=FALSE) + if(!inherits(X, "try-error") && + X$iter == maxiter) warning(paste0("uniroot failed to converge in ", maxiter, " iterations\n"), call.=FALSE) return(Y) - }, vectorize.args = c("N", "EG", "discount", "alpha")) + }, vectorize.args = c("N", "EG", "alpha", "discount", "MPFR")) # Print functions #' @method print IMIFA @@ -2392,8 +2428,7 @@ .version_above <- function(pkg, than) { pkg <- as.character(utils::packageVersion(pkg)) - test <- ifelse(test <- identical(pkg, than), test, as.logical(utils::compareVersion(pkg, than))) - return(test) + identical(pkg, than) || (utils::compareVersion(pkg, than) >= 0) } .which0 <- function(x) which(x == 0) diff --git a/R/Gibbs_IFA.R b/R/Gibbs_IFA.R index 902fed7..f97507c 100644 --- a/R/Gibbs_IFA.R +++ b/R/Gibbs_IFA.R @@ -38,6 +38,8 @@ Q.star <- Q Q.store <- vector("integer", n.store) Q.large <- Q.big <- FALSE + nu1.5 <- nu1 + 0.5 + P.5 <- P/2 mu.sigma <- 1/sigma.mu mu.zero <- as.numeric(mu.zero) @@ -89,11 +91,11 @@ # Shrinkage if(Q0) { load.2 <- lmat * lmat - phi <- .sim_phi(Q=Q, P=P, nu1=nu1, nu2=nu2, tau=tau, load.2=load.2) + phi <- .sim_phi(Q=Q, P=P, nu1.5=nu1.5, nu2=nu2, tau=tau, load.2=load.2) sum.term <- colSums2(phi * load.2) for(k in seq_len(Q)) { - delta[k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2, beta.d2=beta.d2, delta.k=delta[k], Q=Q, P=P, k=k, - tau.kq=tau[k:Q], sum.term.kq=sum.term[k:Q]) else .sim_delta1(Q=Q, P=P, tau=tau, sum.term=sum.term, + delta[k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2, beta.d2=beta.d2, delta.k=delta[k], Q=Q, P.5=P.5, k=k, + tau.kq=tau[k:Q], sum.term.kq=sum.term[k:Q]) else .sim_delta1(Q=Q, P.5=P.5, tau=tau, sum.term=sum.term, alpha.d1=ifelse(Q1, alpha.d2, alpha.d1), beta.d1=ifelse(Q1, beta.d2, beta.d1), delta.1=delta[1L]) tau <- cumprod(delta) } diff --git a/R/Gibbs_IMIFA.R b/R/Gibbs_IMIFA.R index e056451..3cb2823 100644 --- a/R/Gibbs_IMIFA.R +++ b/R/Gibbs_IMIFA.R @@ -48,6 +48,8 @@ G.store <- vector("integer", n.store) act.store <- G.store pi.alpha <- cluster$pi.alpha + nu1.5 <- nu1 + 0.5 + P.5 <- P/2 if(learn.alpha) { alpha.store <- ll.store alpha.shape <- a.hyper[1L] @@ -82,7 +84,7 @@ sig.mu.sqrt <- sqrt(sigma.mu) z <- cluster$z nn <- tabulate(z, nbins=trunc.G) - nn0 <- nn > 0 + nn0 <- nn > 0 nn.ind <- which(nn > 0) G.non <- length(nn.ind) Q.star <- Q @@ -202,7 +204,7 @@ # Shrinkage if(any(Q0)) { load.2 <- lapply(lmat[Gs], .power2) - phi[Gs] <- lapply(Gs, function(g) if(n0q0[g]) .sim_phi(Q=Qs[g], P=P, nu1=nu1, nu2=nu2, tau=tau[[g]], + phi[Gs] <- lapply(Gs, function(g) if(n0q0[g]) .sim_phi(Q=Qs[g], P=P, nu1.5=nu1.5, nu2=nu2, tau=tau[[g]], load.2=load.2[[g]], sigma=MGPsig[g]) else .sim_phi_p(Q=Qs[g], P=P, nu1=nu1, nu2=nu2)) sum.terms <- lapply(Gs, function(g) if(n0q0[g]) colSums2(phi[[g]] * load.2[[g]])) for(g in Gs) { @@ -210,8 +212,8 @@ Q1g <- Q1[g] if(n0q0[g]) { for(k in seq_len(Qg)) { - delta[[g]][k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2, beta.d2=beta.d2, delta.k=delta[[g]][k], tau.kq=tau[[g]][k:Qg], P=P, Q=Qg, - k=k, sum.term.kq=sum.terms[[g]][k:Qg], sigma=MGPsig[g]) else .sim_delta1(Q=Qg, P=P, tau=tau[[g]], sum.term=sum.terms[[g]], + delta[[g]][k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2, beta.d2=beta.d2, delta.k=delta[[g]][k], tau.kq=tau[[g]][k:Qg], P.5=P.5, Q=Qg, + k=k, sum.term.kq=sum.terms[[g]][k:Qg], sigma=MGPsig[g]) else .sim_delta1(Q=Qg, P.5=P.5, tau=tau[[g]], sum.term=sum.terms[[g]], alpha.d1=ifelse(Q1g, alpha.d2, alpha.d1), beta.d1=ifelse(Q1g, beta.d2, beta.d1), delta.1=delta[[g]][1L], sigma=MGPsig[g]) tau[[g]] <- cumprod(delta[[g]]) } @@ -226,7 +228,7 @@ nnX <- n0q0[Gs] n0Gq <- which(nnX) nGq0 <- length(n0Gq) - MGPsig[n0Gq] <- .sim_sigma(G=nGq0, P=P, Qs=Qs[n0Gq], rho1=rho1, rho2=rho2, sum.terms=sum.terms[n0Gq], tau=tau[n0Gq]) + MGPsig[n0Gq] <- .sim_sigma(G=nGq0, P.5=P.5, Qs=Qs[n0Gq], rho1=rho1, rho2=rho2, sum.terms=sum.terms[n0Gq], tau=tau[n0Gq]) MGPsig[which(!nnX)] <- .sim_sigma_p(G=G - nGq0, rho1=rho1, rho2=rho2) } } diff --git a/R/Gibbs_MIFA.R b/R/Gibbs_MIFA.R index 078cbc1..9b21509 100644 --- a/R/Gibbs_MIFA.R +++ b/R/Gibbs_MIFA.R @@ -43,6 +43,8 @@ Q.store <- matrix(0L, nrow=G, ncol=n.store) Q.large <- Q.big <- Q.bigs <- FALSE err.z <- z.err <- FALSE + nu1.5 <- nu1 + 0.5 + P.5 <- P/2 mu.sigma <- 1/sigma.mu sig.mu.sqrt <- sqrt(sigma.mu) @@ -162,7 +164,7 @@ # Shrinkage if(any(Q0)) { load.2 <- lapply(lmat, .power2) - phi <- lapply(Gseq, function(g) if(n0q0[g]) .sim_phi(Q=Qs[g], P=P, nu1=nu1, nu2=nu2, tau=tau[[g]], + phi <- lapply(Gseq, function(g) if(n0q0[g]) .sim_phi(Q=Qs[g], P=P, nu1.5=nu1.5, nu2=nu2, tau=tau[[g]], load.2=load.2[[g]], sigma=MGPsig[g]) else .sim_phi_p(Q=Qs[g], P=P, nu1=nu1, nu2=nu2)) sum.terms <- lapply(Gseq, function(g) if(n0q0[g]) colSums2(phi[[g]] * load.2[[g]])) for(g in Gseq) { @@ -170,8 +172,8 @@ Q1g <- Q1[g] if(n0q0[g]) { for(k in seq_len(Qg)) { - delta[[g]][k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2[g], beta.d2=beta.d2, delta.k=delta[[g]][k], tau.kq=tau[[g]][k:Qg], P=P, Q=Qg, - k=k, sum.term.kq=sum.terms[[g]][k:Qg], sigma=MGPsig[g]) else .sim_delta1(Q=Qg, P=P, tau=tau[[g]], sum.term=sum.terms[[g]], + delta[[g]][k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2[g], beta.d2=beta.d2, delta.k=delta[[g]][k], tau.kq=tau[[g]][k:Qg], P.5=P.5, Q=Qg, + k=k, sum.term.kq=sum.terms[[g]][k:Qg], sigma=MGPsig[g]) else .sim_delta1(Q=Qg, P.5=P.5, tau=tau[[g]], sum.term=sum.terms[[g]], alpha.d1=ifelse(Q1g, alpha.d2[g], alpha.d1[g]), beta.d1=ifelse(Q1g, beta.d2, beta.d1), delta.1=delta[[g]][1L], sigma=MGPsig[g]) tau[[g]] <- cumprod(delta[[g]]) } @@ -184,7 +186,7 @@ } if(cluster.shrink) { nGq0 <- sum(n0q0) - MGPsig[n0q0] <- .sim_sigma(G=nGq0, P=P, Qs=Qs[n0q0], rho1=rho1, rho2=rho2, sum.terms=sum.terms[n0q0], tau=tau[n0q0]) + MGPsig[n0q0] <- .sim_sigma(G=nGq0, P.5=P.5, Qs=Qs[n0q0], rho1=rho1, rho2=rho2, sum.terms=sum.terms[n0q0], tau=tau[n0q0]) MGPsig[!n0q0] <- .sim_sigma_p(G=G - nGq0, rho1=rho1, rho2=rho2) } } diff --git a/R/Gibbs_OMIFA.R b/R/Gibbs_OMIFA.R index 9e418ad..96047e1 100644 --- a/R/Gibbs_OMIFA.R +++ b/R/Gibbs_OMIFA.R @@ -44,12 +44,14 @@ Q.large <- Q.big <- Q.bigs <- FALSE err.z <- z.err <- FALSE G.store <- vector("integer", n.store) + nu1.5 <- nu1 + 0.5 + P.5 <- P/2 mu.sigma <- 1/sigma.mu sig.mu.sqrt <- sqrt(sigma.mu) z <- cluster$z nn <- tabulate(z, nbins=G) - nn0 <- nn > 0 + nn0 <- nn > 0 nn.ind <- which(nn0) G.non <- length(nn.ind) Q.star <- Q @@ -171,7 +173,7 @@ # Shrinkage if(any(Q0)) { load.2 <- lapply(lmat, .power2) - phi <- lapply(Gseq, function(g) if(n0q0[g]) .sim_phi(Q=Qs[g], P=P, nu1=nu1, nu2=nu2, tau=tau[[g]], + phi <- lapply(Gseq, function(g) if(n0q0[g]) .sim_phi(Q=Qs[g], P=P, nu1.5=nu1.5, nu2=nu2, tau=tau[[g]], load.2=load.2[[g]], sigma=MGPsig[g]) else .sim_phi_p(Q=Qs[g], P=P, nu1=nu1, nu2=nu2)) sum.terms <- lapply(Gseq, function(g) if(n0q0[g]) colSums2(phi[[g]] * load.2[[g]])) for(g in Gseq) { @@ -179,8 +181,8 @@ Q1g <- Q1[g] if(n0q0[g]) { for(k in seq_len(Qg)) { - delta[[g]][k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2, beta.d2=beta.d2, delta.k=delta[[g]][k], tau.kq=tau[[g]][k:Qg], P=P, Q=Qg, - k=k, sum.term.kq=sum.terms[[g]][k:Qg], sigma=MGPsig[g]) else .sim_delta1(Q=Qg, P=P, tau=tau[[g]], sum.term=sum.terms[[g]], + delta[[g]][k] <- if(k > 1) .sim_deltak(alpha.d2=alpha.d2, beta.d2=beta.d2, delta.k=delta[[g]][k], tau.kq=tau[[g]][k:Qg], P.5=P.5, Q=Qg, + k=k, sum.term.kq=sum.terms[[g]][k:Qg], sigma=MGPsig[g]) else .sim_delta1(Q=Qg, P.5=P.5, tau=tau[[g]], sum.term=sum.terms[[g]], alpha.d1=ifelse(Q1g, alpha.d2, alpha.d1), beta.d1=ifelse(Q1g, beta.d2, beta.d1), delta.1=delta[[g]][1L], sigma=MGPsig[g]) tau[[g]] <- cumprod(delta[[g]]) } @@ -193,7 +195,7 @@ } if(cluster.shrink) { nGq0 <- sum(n0q0) - MGPsig[n0q0] <- .sim_sigma(G=nGq0, P=P, Qs=Qs[n0q0], rho1=rho1, rho2=rho2, sum.terms=sum.terms[n0q0], tau=tau[n0q0]) + MGPsig[n0q0] <- .sim_sigma(G=nGq0, P.5=P.5, Qs=Qs[n0q0], rho1=rho1, rho2=rho2, sum.terms=sum.terms[n0q0], tau=tau[n0q0]) MGPsig[!n0q0] <- .sim_sigma_p(G=G - nGq0, rho1=rho1, rho2=rho2) } } diff --git a/R/IMIFA.R b/R/IMIFA.R index ed50f47..fad5141 100644 --- a/R/IMIFA.R +++ b/R/IMIFA.R @@ -6,8 +6,8 @@ #' \itemize{ #' \item{Type: }{Package} #' \item{Package: }{IMIFA} -#' \item{Version: }{2.1.5} -#' \item{Date: }{2020-12-29 (this version), 2017-02-02 (original release)} +#' \item{Version: }{2.1.6} +#' \item{Date: }{2021-05-24 (this version), 2017-02-02 (original release)} #' \item{Licence: }{GPL (>=2)} #' } #' diff --git a/R/PlottingFunctions.R b/R/PlottingFunctions.R index 199bbfa..3c61057 100644 --- a/R/PlottingFunctions.R +++ b/R/PlottingFunctions.R @@ -209,7 +209,7 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" if(param == "uniquenesses") { mat <- switch(EXPR=uni.type, constrained=, unconstrained=mat, FALSE) } - mat <- ifelse(n.var == 1, FALSE, mat) + mat <- n.var != 1 && mat z.miss <- missing(zlabels) if(!z.miss) { if(all(!is.factor(zlabels), !is.logical(zlabels), !is.numeric(zlabels)) || @@ -257,7 +257,7 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" length(intervals) != 1)) stop("'intervals' must be a single logical indicator", call.=FALSE) if(any(!is.logical(mat), length(mat) != 1)) stop("'mat' must be a single logical indicator", call.=FALSE) - common <- ifelse(missing(common) && all(grp.ind, !all.ind, m.sw["M.sw"], param == "scores", heat.map), FALSE, ifelse(grp.ind, common, TRUE)) + common <- !(missing(common) && all(grp.ind, !all.ind, m.sw["M.sw"], param == "scores", heat.map)) && (!grp.ind || common) if(any(!is.logical(common), length(common) != 1)) stop("'common' must be a single logical indicator", call.=FALSE) if(any(!is.logical(partial), @@ -476,7 +476,7 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" } } else { base::plot(x=iter, y=x.plot[ind[1L],ind[2L],], type="l", ylab="", xlab="Iteration") - if(titles) graphics::title(main=list(paste0("Trace", ifelse(all.ind, ":\n", paste0(":\nLoadings - ", ifelse(grp.ind, paste0("Cluster ", g, " - "), ""))), var.names[ind[1L]], " Variable, Factor ", ind[2L]))) + if(titles) graphics::title(main=list(paste0("Trace", ifelse(all.ind, ":\n", paste0(":\nLoadings - ", ifelse(grp.ind, paste0("Cluster ", g, " - "), ""))), var.names[ind[1L]], " Variable, Factor ", ind[2L]))) } } @@ -576,7 +576,7 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" } else { plot.d <- tryCatch(stats::density(x.plot[ind[1L],ind[2L],], bw="SJ"), error = function(e) stats::density(x.plot[ind[1L],ind[2L],])) base::plot(plot.d, main="", ylab="", xlab="") - if(titles) graphics::title(main=list(paste0("Density", ifelse(all.ind, ":\n", ":\nScores - "), "Observation ", obs.names[ind[1L]], ", Factor ", ind[2L]))) + if(titles) graphics::title(main=list(paste0("Density", ifelse(all.ind, ":\n", ":\nScores - "), "Observation ", obs.names[ind[1L]], ", Factor ", ind[2L]))) graphics::polygon(plot.d, col=grey, border=NA) } } @@ -604,7 +604,7 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" } else { plot.d <- tryCatch(stats::density(x.plot[ind[1L],ind[2L],], bw="SJ"), error = function(e) stats::density(x.plot[ind[1L],ind[2L],])) base::plot(plot.d, main="", ylab="", xlab="") - if(titles) graphics::title(main=list(paste0("Density", ifelse(all.ind, ":\n", paste0(":\nLoadings - ", ifelse(grp.ind, paste0("Cluster ", g, " - "), ""))), var.names[ind[1L]], " Variable, Factor ", ind[2L]))) + if(titles) graphics::title(main=list(paste0("Density", ifelse(all.ind, ":\n", paste0(":\nLoadings - ", ifelse(grp.ind, paste0("Cluster ", g, " - "), ""))), var.names[ind[1L]], " Variable, Factor ", ind[2L]))) graphics::polygon(plot.d, col=grey, border=NA) } } @@ -1718,7 +1718,7 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" #' #' #' # Other special cases of the PYP are also facilitated #' # G_priorDensity(N=50, alpha=c(alpha, 27.1401, 0), -#' # discount=c(discount, -27.1401/100, 0.8054447), type="b") +#' # discount=c(discount, -27.1401/100, 0.8054448), type="b") G_priorDensity <- function(N, alpha, discount = 0, show.plot = TRUE, type = "h") { igmp <- isNamespaceLoaded("Rmpfr") mpfrind <- suppressMessages(requireNamespace("Rmpfr", quietly=TRUE)) && .version_above("gmp", "0.5-4") @@ -1762,6 +1762,8 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" if(any(discount < 0 & (alpha <= 0 | !.IntMult(alpha, discount)))) stop("'alpha' must be a positive integer multiple of 'abs(discount)' when 'discount' is negative", call.=FALSE) + if(any(alpha == 0 & + discount <= 0)) stop("'discount' must be strictly positive when 'alpha'=0", call.=FALSE) if(length(alpha) != max.len) { alpha <- rep(alpha, max.len) } @@ -1780,16 +1782,16 @@ plot.Results_IMIFA <- function(x, plot.meth = c("all", "correlation", "density" rx[,i] <- gmp::asNumeric(abs(vnk * Rmpfr::.bigz2mpfr(gmp::Stirling1.all(N)))) } else { if(disci > 0) { - vnk <- c(Rmpfr::mpfr(0, precBits=256), cumsum(log(alphi + Nseq[-N] * disci))) - + vnk <- c(Rmpfr::mpfr(0, precBits=256), cumsum(log(alphi + Nseq[-N] * disci))) - log(Rmpfr::pochMpfr(alphi + 1, N - 1L)) - Nsq2 * log(disci) } else { m <- as.integer(alphi/abs(disci)) mn <- min(m, N) seqN <- seq_len(mn - 1L) - vnk <- c(c(Rmpfr::mpfr(0, precBits=256), cumsum(log(m - seqN)) + seqN * log(abs(disci))) - - log(Rmpfr::pochMpfr(alphi + 1, N - 1L)) - c(seqN, mn) * log(abs(disci)), rep(-Inf, N - mn)) + vnk <- c(c(Rmpfr::mpfr(0, precBits=256), cumsum(log(m - seqN)) + seqN * log(abs(disci))) - + log(Rmpfr::pochMpfr(alphi + 1, N - 1L)) - c(seqN, mn) * log(abs(disci)), rep(-Inf, N - mn)) } - lnkd <- lapply(Nseq, function(g) Rmpfr::sumBinomMpfr(g, f=function(k) Rmpfr::pochMpfr(-k * disci, N))) + lnkd <- lapply(Nseq, function(g) Rmpfr::sumBinomMpfr(g, f=function(k) Rmpfr::pochMpfr(-k * disci, N), n0=1)) rx[,i] <- gmp::asNumeric(exp(vnk - lfactorial(Nsq2)) * abs(Rmpfr::mpfr2array(unlist(lnkd), dim=N))) } } diff --git a/inst/NEWS.md b/inst/NEWS.md index b5a178c..500d416 100644 --- a/inst/NEWS.md +++ b/inst/NEWS.md @@ -1,9 +1,14 @@ __Infinite Mixtures of Infinite Factor Analysers__ ================================================== +## IMIFA v2.1.6 - (_13th release [patch update]: 2021-05-24_) ### Bug Fixes & Miscellaneous Edits * Fixed breaking bugs associated with IM(I)FA slice samplers introduced in previous update. * `G_calibrate` function exported to augment existing `G_expected` & `G_variance` functions. +* `G_variance` now computed more accurately and efficiently for the `alpha=0` case. +* Major speed-up to `G_expected` for the `alpha=0` case. +* Minor speed-ups to simulation of local/column/cluster shrinkage parameters + scores & loadings. +* Minor speed-up to `G_priorDensity` for non-zero `discount`. * Minor speed-up to `psi_hyper`. ## IMIFA v2.1.5 - (_12th release [patch update]: 2020-12-29_) diff --git a/man/G_moments.Rd b/man/G_moments.Rd index fa3cced..1a8bb31 100644 --- a/man/G_moments.Rd +++ b/man/G_moments.Rd @@ -19,8 +19,9 @@ G_variance(N, G_calibrate(N, EG, - discount = 0, alpha = NULL, + discount = 0, + MPFR = TRUE, ...) } \arguments{ @@ -30,9 +31,9 @@ G_calibrate(N, \item{discount}{The discount parameter for the Pitman-Yor process. Must be less than 1, but typically lies in the interval [0, 1). Defaults to 0 (i.e. the Dirichlet process). When \code{discount} is negative \code{alpha} must be a positive integer multiple of \code{abs(discount)}. See \strong{Details} for behaviour for \code{G_calibrate}.} -\item{MPFR}{Logical indicating whether the high-precision libraries \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} are invoked, at the expense of run-time. Defaults to \code{TRUE} and \strong{must} be \code{TRUE} for \code{\link{G_expected}} when \code{alpha=0} and \code{\link{G_variance}} when \code{discount} is non-zero. See \strong{\code{Note}}.} +\item{MPFR}{Logical indicating whether the high-precision libraries \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} are invoked, at the expense of run-time. Defaults to \code{TRUE} and \strong{must} be \code{TRUE} for \code{G_expected} when \code{alpha=0} or \code{G_variance} when \code{discount} is non-zero. For \code{G_calibrate}, it is \emph{strongly recommended} to use \code{MPFR=TRUE} when \code{discount} is non-zero and strictly necessary when \code{alpha=0} is supplied. See \strong{\code{Note}}.} -\item{EG}{The prior expected number of clusters. Must exceed \code{1}.} +\item{EG}{The prior expected number of clusters. Must exceed \code{1} and be less than \code{N}.} \item{...}{Additional arguments passed to \code{\link[stats]{uniroot}}, e.g. \code{maxiter}.} } @@ -45,10 +46,10 @@ Calculate the \emph{a priori} expected number of clusters (\code{G_expected}) or \details{ All arguments are vectorised. Users can also consult \code{\link{G_priorDensity}} in order to solicit sensible priors. -For \code{G_calibrate}, \strong{only one} of \code{alpha} or \code{discount} can be supplied, and the function elicits a value for the opposing parameter which achieves the desired expected number of clusters \code{EG} for the given sample size \code{N}. By default, a value for \code{alpha} subject to \code{discount=0} (i.e. the Dirichlet process) is elicited. See \strong{Examples} below. +For \code{G_calibrate}, \strong{only one} of \code{alpha} or \code{discount} can be supplied, and the function elicits a value for the opposing parameter which achieves the desired expected number of clusters \code{EG} for the given sample size \code{N}. By default, a value for \code{alpha} subject to \code{discount=0} (i.e. the Dirichlet process) is elicited. Note that \code{alpha} may not be a positive integer multiple of \code{discount} as it should be if \code{discount} is negative. See \strong{Examples} below. } \note{ -\code{G_variance} requires use of the \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} libraries for non-zero \code{discount} values. \code{G_expected} requires these libraries only for the \code{alpha=0} case. Despite the high precision arithmetic used, the functions can still be unstable for small values of \code{discount}. See the argument \code{MPFR}. +\code{G_variance} requires use of the \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} libraries for non-zero \code{discount} values. \code{G_expected} requires these libraries only for the \code{alpha=0} case. These libraries are \emph{strongly recommended} (but they are not required) for \code{G_calbirate} when \code{discount} is non-zero, but they are required when \code{alpha=0} is supplied. Despite the high precision arithmetic used, the functions can still be unstable for large \code{N} and/or extreme values of \code{alpha} and/or \code{discount}. See the argument \code{MPFR}. } \examples{ G_expected(N=50, alpha=19.23356, MPFR=FALSE) @@ -69,18 +70,23 @@ matplot(DP, type="l", xlab="N", ylab="G") # matplot(PY, type="l", xlab="N", ylab="G") # Other special cases of the PYP are also facilitated -# G_expected(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054447)) -# G_variance(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054447)) +# G_expected(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054448)) +# G_variance(N=50, alpha=c(27.1401, 0), discount=c(-27.1401/100, 0.8054448)) -# Elicit values for alpha +# Elicit values for alpha under a DP prior G_calibrate(N=50, EG=25) -G_calibrate(N=50, EG=25, discount=c(0.25, 0.7300045)) -# Elicit values for discount -G_calibrate(N=50, EG=25, alpha=c(12.21619, 1)) +# Elicit values for alpha under a PYP prior +# require("Rmpfr") +# G_calibrate(N=50, EG=25, discount=c(-27.1401/100, 0.25, 0.7300045)) + +# Elicit values for discount under a PYP prior +# G_calibrate(N=50, EG=25, alpha=c(12.21619, 1, 0), maxiter=2000) } \references{ De Blasi, P., Favaro, S., Lijoi, A., Mena, R. H., Prunster, I., and Ruggiero, M. (2015) Are Gibbs-type priors the most natural generalization of the Dirichlet process?, \emph{IEEE Transactions on Pattern Analysis and Machine Intelligence}, 37(2): 212-229. + +Yamato, H. and Shibuya, M. (2000) Moments of some statistics of Pitman sampling formula, \emph{Bulletin of Informatics and Cybernetics}, 32(1): 1-10. } \seealso{ \code{\link{G_priorDensity}}, \code{\link[Rmpfr]{Rmpfr}}, \code{\link[stats]{uniroot}} diff --git a/man/G_priorDensity.Rd b/man/G_priorDensity.Rd index b24626c..c33ff82 100644 --- a/man/G_priorDensity.Rd +++ b/man/G_priorDensity.Rd @@ -52,7 +52,7 @@ Requires use of the \code{\link[Rmpfr]{Rmpfr}} and \code{gmp} libraries; may enc #' # Other special cases of the PYP are also facilitated # G_priorDensity(N=50, alpha=c(alpha, 27.1401, 0), -# discount=c(discount, -27.1401/100, 0.8054447), type="b") +# discount=c(discount, -27.1401/100, 0.8054448), type="b") } \references{ De Blasi, P., Favaro, S., Lijoi, A., Mena, R. H., Prunster, I., and Ruggiero, M. (2015) Are Gibbs-type priors the most natural generalization of the Dirichlet process?, \emph{IEEE Transactions on Pattern Analysis and Machine Intelligence}, 37(2): 212-229. diff --git a/man/IMIFA-package.Rd b/man/IMIFA-package.Rd index e417688..9a75123 100644 --- a/man/IMIFA-package.Rd +++ b/man/IMIFA-package.Rd @@ -13,8 +13,8 @@ A package for Bayesian nonparameteric clustering of high-dimensional data sets, \itemize{ \item{Type: }{Package} \item{Package: }{IMIFA} -\item{Version: }{2.1.5} -\item{Date: }{2020-12-29 (this version), 2017-02-02 (original release)} +\item{Version: }{2.1.6} +\item{Date: }{2021-05-24 (this version), 2017-02-02 (original release)} \item{Licence: }{GPL (>=2)} } } diff --git a/vignettes/IMIFA.Rmd b/vignettes/IMIFA.Rmd index ea895a2..02c9950 100644 --- a/vignettes/IMIFA.Rmd +++ b/vignettes/IMIFA.Rmd @@ -35,7 +35,7 @@ If you find bugs or want to suggest new features please visit the __IMIFA__ [Git This vignette aims to reproduce some results in the Murphy et al. (2020) paper using the `mcmc_IMIFA()` and `get_IMIFA_results()` functions and demonstrates how the plots therein were created using the dedicated S3 `plot` method, while also demonstrating how to fit other models in the IMIFA family. ### Installing IMIFA -__IMIFA__ will run in Windows, Mac OS X, or Linux. To install __IMIFA__ you first need to install [R](https://cran.r-project.org/). Installing [RStudio](https://rstudio.com/) as a nice desktop environment for using R is also recommended. +__IMIFA__ will run in Windows, Mac OS X, or Linux. To install __IMIFA__ you first need to install [R](https://cran.r-project.org/). Installing [RStudio](https://www.rstudio.com/) as a nice desktop environment for using R is also recommended. Once in R you can type: