diff --git a/.github/workflows/check_for_cran.yaml b/.github/workflows/check_for_cran.yaml index f978317ef..e88825fb4 100644 --- a/.github/workflows/check_for_cran.yaml +++ b/.github/workflows/check_for_cran.yaml @@ -13,10 +13,9 @@ jobs: fail-fast: true matrix: config: - - { R: "devel", os: "ubuntu-20.04"} + - { R: "devel", os: "ubuntu-latest"} - { R: "release", os: "macos-latest"} - - { R: "release", os: "windows-latest"} - - { R: "release", os: "ubuntu-20.04"} + - { R: "release", os: "ubuntu-latest"} runs-on: ${{ matrix.config.os }} diff --git a/DESCRIPTION b/DESCRIPTION index 5faf894c9..dcc3ab583 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,18 @@ Package: rbmi Title: Reference Based Multiple Imputation -Version: 1.2.3 +Version: 1.2.4 Authors@R: c( person("Craig", "Gower-Page", email = "craig.gower-page@roche.com", role = c("aut", "cre")), person("Alessandro", "Noci", email = "alessandro.noci@roche.com", role = c("aut")), person("Marcel", "Wolbers", email = "marcel.wolbers@roche.com", role = "ctb"), person("Roche", role = c("cph", "fnd")) ) -Description: Implements reference based multiple imputation allowing for the imputation of longitudinal datasets using predefined strategies. +Description: Implements standard and reference based multiple imputation methods for continuous + longitudinal endpoints (Gower-Page et al. (2022) ). In particular, rbmi + supports deterministic conditional mean imputation and jackknifing as described in Wolbers et al. + (2022) , Bayesian multiple imputation as described in Carpenter et al. (2013) + , and bootstrapped maximum likelihood imputation as described in + von Hippel and Bartlett (2021) . Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) diff --git a/NEWS.md b/NEWS.md index 6c1d93e5a..349809b86 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,9 @@ -# rbmi (development version) +# rbmi 1.2.4 + +* Updated internal Stan code to ensure future compatibility (@andrjohns, #390) +* Updated package description to include relevant references (#393) +* Fixed documentation typos (#393) -* Updated internal Stan code to ensure future compatibility (Thank you @andrjohns) # rbmi 1.2.3 diff --git a/R/draws.R b/R/draws.R index 9a38a19e3..ae0889994 100644 --- a/R/draws.R +++ b/R/draws.R @@ -275,7 +275,7 @@ draws.bmlmi <- function(data, data_ice = NULL, vars, method, ncores = 1, quiet = #' the subject ids from the original dataset are returned. These values are used to tell [impute()] #' what subjects should be used to derive the imputed dataset. #' @param failure_limit Number of failed samples that are allowed before throwing an error -#' @param ncores Number of processes to parallise the job over +#' @param ncores Number of processes to parallelise the job over #' @param quiet Logical, If `TRUE` will suppress printing of progress information that is printed to #' the console. #' @@ -695,10 +695,10 @@ validate.draws <- function(x, ...) { #' #' @description #' -#' Object is initalised with total number of iterations that are expected to occour. +#' Object is initalised with total number of iterations that are expected to occur. #' User can then update the object with the `add` method to indicate how many more iterations -#' have just occoured. -#' Every time `step` * 100 % of iterations have occured a message is printed to the console. +#' have just occurred. +#' Every time `step` * 100 % of iterations have occurred a message is printed to the console. #' Use the `quiet` argument to prevent the object from printing anything at all #' #' @import R6 @@ -739,7 +739,7 @@ progressLogger <- R6::R6Class( #' this will add that number to the current step count (`step_current`) and will #' print a progress message to the log if the step limit (`step`) has been reached. #' This function will do nothing if `quiet` has been set to `TRUE` - #' @param n the number of sucessfully complete iterations since `add()` was last called + #' @param n the number of successfully complete iterations since `add()` was last called add = function(n) { if (self$quiet) { return(invisible()) diff --git a/R/longData.R b/R/longData.R index b484a9a8e..e2b001c39 100644 --- a/R/longData.R +++ b/R/longData.R @@ -78,7 +78,7 @@ longDataConstructor <- R6::R6Class( #' This list is defaulted to TRUE for all subjects & outcomes and is then #' modified by calls to `self$set_strategies()`. #' Note that this does not indicate which values are missing, this variable - #' is True for outcome values that either occoured before the ICE visit + #' is True for outcome values that either occurred before the ICE visit #' or are post the ICE visit and have an imputation strategy of MAR is_mar = list(), @@ -475,7 +475,7 @@ longDataConstructor <- R6::R6Class( assert_that( identical(names(x), self$visits), msg = paste( - "An unexpected error has occoured in check_has_data_at_each_visit()", + "An unexpected error has occurred in check_has_data_at_each_visit()", "please report this to the developer" ) ) diff --git a/R/lsmeans.R b/R/lsmeans.R index f4dd226f8..622686c25 100644 --- a/R/lsmeans.R +++ b/R/lsmeans.R @@ -14,11 +14,11 @@ #' categorical covariate and by setting any numerical covariates equal #' to the mean. #' -#' A final lsmean value is calculating by averaging these hypothetical +#' A final lsmean value is calculated by averaging these hypothetical #' patients. If `.weights` equals `"proportional"` then the values are weighted -#' by the frequency in which they occour in the full dataset. If `.weights` +#' by the frequency in which they occur in the full dataset. If `.weights` #' equals `"equal"` then each hypothetical patient is given an equal weight -#' regardless of what actually occours in the dataset. +#' regardless of what actually occurs in the dataset. #' #' Use the `...` argument to fix specific variables to specific values. #' @@ -83,7 +83,7 @@ lsmeans <- function(model, ..., .weights = c("proportional", "equal")) { #' and standard error. `ls_design_equal` calculates it by #' applying an equal weight per covariate combination whilst #' `ls_design_proportional` applies weighting proportional -#' to the frequency in which the covariate combination occoured +#' to the frequency in which the covariate combination occurred #' in the actual dataset. #' #' @param data A data.frame diff --git a/R/mmrm.R b/R/mmrm.R index 3bbfe703d..d3e9865f9 100644 --- a/R/mmrm.R +++ b/R/mmrm.R @@ -162,7 +162,7 @@ extract_params <- function(fit) { #' @param outcome a numeric vector. The outcome value to be regressed on in the MMRM model. #' @param subjid a character / factor vector. The subject identifier used to link separate visits #' that belong to the same subject. -#' @param visit a character / factor vector. Indicates which visit the outcome value occoured on. +#' @param visit a character / factor vector. Indicates which visit the outcome value occurred on. #' @param group a character / factor vector. Indicates which treatment group the patient belongs to. #' @param cov_struct a character value. Specifies which covariance structure to use. Must be one of #' `"us"`, `"toep"`, `"cs"` or `"ar1"` @@ -237,7 +237,7 @@ fit_mmrm <- function(designmat, #' This function was originally developed for use with glmmTMB which needed #' more hand-holding and dropping of false-positive warnings. It is not #' as important now but is kept around encase we need to catch -#' false-postive warnings again in the future. +#' false-positive warnings again in the future. #' #' @examples #' \dontrun{ diff --git a/R/utilities.R b/R/utilities.R index 44633a1a7..b06e2ed79 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -517,7 +517,7 @@ as_dataframe <- function(x) { #' Do not run this function #' -#' This function only exists to supress the false positive +#' This function only exists to suppress the false positive #' from R CMD Check about unused libraries #' #' Both rstantools and RcppParallel are required but are only used at diff --git a/README.md b/README.md index 92d73a7fe..25fb73899 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ -[![R-CMD-check](https://github.com/insightsengineering/rbmi/workflows/R-CMD-check/badge.svg)](https://github.com/insightsengineering/rbmi/actions) +[![CRAN +status](https://www.r-pkg.org/badges/version/rbmi)](https://cran.r-project.org/package=rbmi) +[![R-CMD-check](https://github.com/insightsengineering/rbmi/actions/workflows/on_push.yaml/badge.svg?branch=main)](https://github.com/insightsengineering/rbmi/actions/workflows/on_push.yaml) diff --git a/cran-comments.md b/cran-comments.md index 42a58c305..01013371c 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,12 +1,10 @@ ## Summary of Submission -This is a re-submission to ensure that our unit tests do not fail on CRANs servers. -The original uploads notes are as follows: - In this version I have: -* Replaced our dependencies from glmmTMB to mmrm to improve package performance and stability -* Upgraded our use of parallel processes to be more reliable in testing environments +* Updated our Stan syntax to ensure future compatibility +* Updated our package description to contain relevant references +* Fixed several typos in our documentation ## R CMD check results @@ -15,9 +13,9 @@ There were no ERRORs or WARNINGs. There were 2 NOTEs: ❯ checking installed package size ... NOTE - installed size is 57.4Mb - sub-directories of 1Mb or more: - libs 56.0Mb + installed size is 55.6Mb + sub-directories of 1Mb or more: + libs 54.3Mb - This is a consequence of using Rstan which produces quite large binaries when compiled. As far as I'm aware there is no way for us to reduce this and is dependent on the Stan development team. Our understanding from the [developers](https://discourse.mc-stan.org/t/using-rstan-in-an-r-package-generates-r-cmd-check-notes/26628) is that this is acceptable to ignore. @@ -33,11 +31,11 @@ There were 2 NOTEs: The package was tested in the following environments: -- Ubuntu 20.04, R release (GitHub Actions) -- Windows latest, R release (Local Machine) -- Mac OS latest, R release (Local Machine + GitHub Actions) -- Ubuntu 20.04, R devel (GitHub Actions) - +- Ubuntu, R release (GitHub Actions) +- Windows, R release (Local Machine + Rhub + Win-Builder) +- MacOS, R release (Local Machine + GitHub Actions) +- Ubuntu, R devel (GitHub Actions) +- Fedora, R devel (Rhub) ## Downstream dependencies diff --git a/man/as_mmrm_df.Rd b/man/as_mmrm_df.Rd index 27580ac56..acad8fda6 100644 --- a/man/as_mmrm_df.Rd +++ b/man/as_mmrm_df.Rd @@ -13,7 +13,7 @@ any missing values} \item{outcome}{a numeric vector. The outcome value to be regressed on in the MMRM model.} -\item{visit}{a character / factor vector. Indicates which visit the outcome value occoured on.} +\item{visit}{a character / factor vector. Indicates which visit the outcome value occurred on.} \item{subjid}{a character / factor vector. The subject identifier used to link separate visits that belong to the same subject.} diff --git a/man/do_not_run.Rd b/man/do_not_run.Rd index 3d15d8c04..ede958cf9 100644 --- a/man/do_not_run.Rd +++ b/man/do_not_run.Rd @@ -7,7 +7,7 @@ do_not_run() } \description{ -This function only exists to supress the false positive +This function only exists to suppress the false positive from R CMD Check about unused libraries } \details{ diff --git a/man/eval_mmrm.Rd b/man/eval_mmrm.Rd index f8b1f1846..098912806 100644 --- a/man/eval_mmrm.Rd +++ b/man/eval_mmrm.Rd @@ -21,7 +21,7 @@ without the program exiting. This function was originally developed for use with glmmTMB which needed more hand-holding and dropping of false-positive warnings. It is not as important now but is kept around encase we need to catch -false-postive warnings again in the future. +false-positive warnings again in the future. } \examples{ \dontrun{ diff --git a/man/fit_mmrm.Rd b/man/fit_mmrm.Rd index 75644909c..c34997828 100644 --- a/man/fit_mmrm.Rd +++ b/man/fit_mmrm.Rd @@ -25,7 +25,7 @@ any missing values} \item{subjid}{a character / factor vector. The subject identifier used to link separate visits that belong to the same subject.} -\item{visit}{a character / factor vector. Indicates which visit the outcome value occoured on.} +\item{visit}{a character / factor vector. Indicates which visit the outcome value occurred on.} \item{group}{a character / factor vector. Indicates which treatment group the patient belongs to.} diff --git a/man/get_draws_mle.Rd b/man/get_draws_mle.Rd index 14a4d10c9..e236e2f5e 100644 --- a/man/get_draws_mle.Rd +++ b/man/get_draws_mle.Rd @@ -38,7 +38,7 @@ what subjects should be used to derive the imputed dataset.} \item{failure_limit}{Number of failed samples that are allowed before throwing an error} -\item{ncores}{Number of processes to parallise the job over} +\item{ncores}{Number of processes to parallelise the job over} \item{quiet}{Logical, If \code{TRUE} will suppress printing of progress information that is printed to the console.} diff --git a/man/longDataConstructor.Rd b/man/longDataConstructor.Rd index fb0d90898..a97c06d1a 100644 --- a/man/longDataConstructor.Rd +++ b/man/longDataConstructor.Rd @@ -55,7 +55,7 @@ if the subjects outcome values are MAR or not. This list is defaulted to TRUE for all subjects & outcomes and is then modified by calls to \code{self$set_strategies()}. Note that this does not indicate which values are missing, this variable -is True for outcome values that either occoured before the ICE visit +is True for outcome values that either occurred before the ICE visit or are post the ICE visit and have an imputation strategy of MAR} \item{\code{strategies}}{A list indexed by subject storing a single character diff --git a/man/ls_design.Rd b/man/ls_design.Rd index 4ad5072e6..6cdba4aab 100644 --- a/man/ls_design.Rd +++ b/man/ls_design.Rd @@ -25,6 +25,6 @@ Calculates the design vector as required to generate the lsmean and standard error. \code{ls_design_equal} calculates it by applying an equal weight per covariate combination whilst \code{ls_design_proportional} applies weighting proportional -to the frequency in which the covariate combination occoured +to the frequency in which the covariate combination occurred in the actual dataset. } diff --git a/man/lsmeans.Rd b/man/lsmeans.Rd index 9d4db1f16..24739d6d6 100644 --- a/man/lsmeans.Rd +++ b/man/lsmeans.Rd @@ -28,11 +28,11 @@ are constructed by expanding out all possible combinations of each categorical covariate and by setting any numerical covariates equal to the mean. -A final lsmean value is calculating by averaging these hypothetical +A final lsmean value is calculated by averaging these hypothetical patients. If \code{.weights} equals \code{"proportional"} then the values are weighted -by the frequency in which they occour in the full dataset. If \code{.weights} +by the frequency in which they occur in the full dataset. If \code{.weights} equals \code{"equal"} then each hypothetical patient is given an equal weight -regardless of what actually occours in the dataset. +regardless of what actually occurs in the dataset. Use the \code{...} argument to fix specific variables to specific values. diff --git a/man/progressLogger.Rd b/man/progressLogger.Rd index 584eefcc5..5bb9f5891 100644 --- a/man/progressLogger.Rd +++ b/man/progressLogger.Rd @@ -4,10 +4,10 @@ \alias{progressLogger} \title{R6 Class for printing current sampling progress} \description{ -Object is initalised with total number of iterations that are expected to occour. +Object is initalised with total number of iterations that are expected to occur. User can then update the object with the \code{add} method to indicate how many more iterations -have just occoured. -Every time \code{step} * 100 \% of iterations have occured a message is printed to the console. +have just occurred. +Every time \code{step} * 100 \% of iterations have occurred a message is printed to the console. Use the \code{quiet} argument to prevent the object from printing anything at all } \section{Public fields}{ @@ -73,7 +73,7 @@ This function will do nothing if \code{quiet} has been set to \code{TRUE} \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{n}}{the number of sucessfully complete iterations since \code{add()} was last called} +\item{\code{n}}{the number of successfully complete iterations since \code{add()} was last called} } \if{html}{\out{
}} } diff --git a/tests/testthat/test-draws.R b/tests/testthat/test-draws.R index 7c867a6e3..955236e3e 100644 --- a/tests/testthat/test-draws.R +++ b/tests/testthat/test-draws.R @@ -617,7 +617,7 @@ test_that("draws.bmlmi works as expected", { -test_that("quiet supress progress messages", { +test_that("quiet suppress progress messages", { bign <- 90 sigma <- as_vcov( diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R index 1b3236dba..7fb72148f 100644 --- a/tests/testthat/test-mcmc.R +++ b/tests/testthat/test-mcmc.R @@ -81,7 +81,7 @@ test_that("split_dim creates a list from an array as expected", { -test_that("Verbose supression works", { +test_that("Verbose suppression works", { set.seed(301) sigma <- as_vcov(c(6, 4, 4), c(0.5, 0.2, 0.3)) diff --git a/vignettes/stat_specs.Rmd b/vignettes/stat_specs.Rmd index de84d95fb..161e663bb 100644 --- a/vignettes/stat_specs.Rmd +++ b/vignettes/stat_specs.Rmd @@ -454,7 +454,7 @@ Conditional mean imputation combined with the jackknife is the only method which Bayesian MI methods rely on the specification of prior distributions and the usage of Markov chain Monte Carlo (MCMC) methods. All other methods based on multiple imputation or bootstrapping require no other tuning parameters than the specification of the number of imputations $M$ or bootstrap samples $B$ and rely on numerical optimization for fitting the MMRM imputation models via REML. Conditional mean imputation combined with the jackknife has no tuning parameters. -In our `rbmi` implementation, the fitting of the MMRM imputation model via REML is computationally most expensive. MCMC sampling using `rstan` (@Rstan) is typically relatively fast in our setting and requires only a small burn-in and burn-between of the chains. In addition, the number of random imputations for reliable inference using Rubin's rules is often smaller than the number of resamples required for the jackknife or the bootstrap (see e.g. the discussions in @White2011multiple[Section 7] for Bayesian MI and the Appendix of @Wolbers2021 for the bootstrap). Thus, for many applications, we expect that conventional MI based on Bayesian posterior draws will be fastest, followed by conventional MI using approximate Bayesian posterior draws and conditional mean imputation combined with the jackknife. Conditional mean imputation combined with the bootstrap and bootstrapped MI methods will typically be most computationally demanding. Of note, all implemented methods are conceptually straightforward to parallelize and some parallelization support is provided by `rbmi`. +In our `rbmi` implementation, the fitting of the MMRM imputation model via REML is computationally most expensive. MCMC sampling using `rstan` (@Rstan) is typically relatively fast in our setting and requires only a small burn-in and burn-between of the chains. In addition, the number of random imputations for reliable inference using Rubin's rules is often smaller than the number of resamples required for the jackknife or the bootstrap (see e.g. the discussions in @White2011multiple[Section 7] for Bayesian MI and the Appendix of @Wolbers2021 for the bootstrap). Thus, for many applications, we expect that conventional MI based on Bayesian posterior draws will be fastest, followed by conventional MI using approximate Bayesian posterior draws and conditional mean imputation combined with the jackknife. Conditional mean imputation combined with the bootstrap and bootstrapped MI methods will typically be most computationally demanding. Of note, all implemented methods are conceptually straightforward to parallelise and some parallelisation support is provided by `rbmi`. # Mapping of statistical methods to `rbmi` functions {#sec:rbmiFunctions} diff --git a/vignettes/stat_specs.html b/vignettes/stat_specs.html index 4fd44bc07..b5dc78b2d 100644 --- a/vignettes/stat_specs.html +++ b/vignettes/stat_specs.html @@ -706,7 +706,7 @@

3.10.2 Standard errors of the tre

3.10.3 Computational complexity

Bayesian MI methods rely on the specification of prior distributions and the usage of Markov chain Monte Carlo (MCMC) methods. All other methods based on multiple imputation or bootstrapping require no other tuning parameters than the specification of the number of imputations \(M\) or bootstrap samples \(B\) and rely on numerical optimization for fitting the MMRM imputation models via REML. Conditional mean imputation combined with the jackknife has no tuning parameters.

-

In our rbmi implementation, the fitting of the MMRM imputation model via REML is computationally most expensive. MCMC sampling using rstan (Stan Development Team (2020)) is typically relatively fast in our setting and requires only a small burn-in and burn-between of the chains. In addition, the number of random imputations for reliable inference using Rubin’s rules is often smaller than the number of resamples required for the jackknife or the bootstrap (see e.g. the discussions in I. R. White, Royston, and Wood (2011, sec. 7) for Bayesian MI and the Appendix of Wolbers et al. (2022) for the bootstrap). Thus, for many applications, we expect that conventional MI based on Bayesian posterior draws will be fastest, followed by conventional MI using approximate Bayesian posterior draws and conditional mean imputation combined with the jackknife. Conditional mean imputation combined with the bootstrap and bootstrapped MI methods will typically be most computationally demanding. Of note, all implemented methods are conceptually straightforward to parallelize and some parallelization support is provided by rbmi.

+

In our rbmi implementation, the fitting of the MMRM imputation model via REML is computationally most expensive. MCMC sampling using rstan (Stan Development Team (2020)) is typically relatively fast in our setting and requires only a small burn-in and burn-between of the chains. In addition, the number of random imputations for reliable inference using Rubin’s rules is often smaller than the number of resamples required for the jackknife or the bootstrap (see e.g. the discussions in I. R. White, Royston, and Wood (2011, sec. 7) for Bayesian MI and the Appendix of Wolbers et al. (2022) for the bootstrap). Thus, for many applications, we expect that conventional MI based on Bayesian posterior draws will be fastest, followed by conventional MI using approximate Bayesian posterior draws and conditional mean imputation combined with the jackknife. Conditional mean imputation combined with the bootstrap and bootstrapped MI methods will typically be most computationally demanding. Of note, all implemented methods are conceptually straightforward to parallelise and some parallelization support is provided by rbmi.