diff --git a/R/distributions.R b/R/distributions.R index ab5e3d23..63bd783a 100644 --- a/R/distributions.R +++ b/R/distributions.R @@ -407,6 +407,10 @@ DistrParamChar = function(name, instance_name, trans = DistrParamTrans()) { } DistrParamCharFit = function(name, instance_name, trans = DistrParamTrans()) { self = DistrParamChar(name, instance_name, trans) + # Need to update default value with trans(default val) + # self$default = function() { + # list(self$trans$ref(self$check_in_spec())) |> setNames(self$instance_name) + # } self$distr_params_frame = function() { self$check_in_spec() mat = self$global_name @@ -416,7 +420,7 @@ DistrParamCharFit = function(name, instance_name, trans = DistrParamTrans()) { , default = self$model_spec$default[[mat]] ) } - return_object(self, "DistrParamCharNoFit") + return_object(self, "DistrParamCharFit") } DistrParamCharNoFit = function(name, instance_name, trans = DistrParamTrans()) { self = DistrParamChar(name, instance_name, trans) @@ -515,23 +519,31 @@ TESTDISTR = function(location, sd) { #' Distributions which can be used to specify prior or likelihood components in #' model calibration. #' -#' @param location Location parameter. Only necessary if used as a prior -#' distribution. If it is used as a likelihood component the location -#' parameter will be taken as the simulated variable being fitted to data, -#' and so this `location` parameter should be left to the default. +#' @param location Location parameter. +#' Specifying the `location` parameter is only necessary when the distribution +#' is used as a prior distribution. If it is used as a likelihood component the +#' location parameter will be taken as the simulated variable being fitted to +#' data, and so this `location` parameter should be left to the default. #' @param sd Standard deviation parameter. -#' @param disp Dispersion parameter. +#' @param disp Dispersion parameter. +#' @param default_trans Named list of default transformations for each +#' distributional parameter. See `?transform_distr_param` for a list of +#' available transformations. #' +#' @details All distributional parameter arguments can be specified either as +#' a numeric value, a character string giving the parameter name, or a +#' distributional parameter object (See ?fit_distr_params). #' @name distribution NULL -#' @description * Uniform Distribution (Improper), only appropriate for prior components - `mp_uniform` +#' @description * Uniform Distribution (Improper), only appropriate for prior +#' components - `mp_uniform` #' @name distribution #' @export -mp_uniform = function() { +mp_uniform = function(default_trans = list()) { self = DistrSpec( distr_param_objs = nlist() - , default_trans = list() + , default_trans = default_trans ) self$prior = \(par) { "-0" @@ -552,10 +564,12 @@ mp_normal_error = function(sd) { #' @description * Normal Distribution - `mp_normal` #' @name distribution #' @export -mp_normal = function(location = DistrParam("location"), sd) { +mp_normal = function(location = DistrParam("location") + , sd + , default_trans = list(location = mp_identity, sd = mp_log)) { self = DistrSpec( distr_param_objs = nlist(location, sd) - , default_trans = list(location = mp_identity, sd = mp_log) + , default_trans = default_trans ) self$prior = \(par) { sprintf("-sum(dnorm(%s, %s, %s))" @@ -577,12 +591,14 @@ mp_normal = function(location = DistrParam("location"), sd) { #' @description * Log-Normal Distribution - `mp_log_normal` #' @name distribution #' @export -mp_log_normal = function(location = DistrParam("location"), sd) { +mp_log_normal = function(location = DistrParam("location") + , sd + , default_trans = list(location = mp_identity, sd = mp_identity)) { self = DistrSpec( distr_param_objs = nlist(location, sd) # identity transformations because distributional parameters are already # specified on the log scale? - , default_trans = list(location = mp_identity, sd = mp_identity) + , default_trans = default_trans ) self$prior = \(par) { @@ -613,10 +629,11 @@ mp_log_normal = function(location = DistrParam("location"), sd) { #' @description * Poisson Distribution - `mp_poisson` #' @name distribution #' @export -mp_poisson = function(location = DistrParam("location")) { +mp_poisson = function(location = DistrParam("location") + , default_trans = list(location = mp_identity)) { self = DistrSpec( distr_param_objs = nlist(location)# should this be named lambda - , default_trans = list(location = mp_identity) + , default_trans = default_trans ) self$prior = \(par) { sprintf("-sum(dpois(%s, %s))" @@ -635,10 +652,12 @@ mp_poisson = function(location = DistrParam("location")) { #' @description * Negative Binomial Distribution - `mp_neg_bin` #' @name distribution #' @export -mp_neg_bin = function(location = DistrParam("location"), disp) { +mp_neg_bin = function(location = DistrParam("location") + , disp + , default_trans = list(location = mp_identity, disp = mp_log)) { self = DistrSpec( distr_param_objs = nlist(location, disp) - , default_trans = list(location = mp_identity, disp = mp_log) + , default_trans = default_trans ) self$prior = \(par) { sprintf("-sum(dnbinom(%s, clamp(%s), %s))" @@ -673,6 +692,56 @@ mp_neg_bin = function(location = DistrParam("location"), disp) { #' `sd` in the \code{\link{mp_normal}} distributions has a default log #' transformation specified using \code{\link{mp_log}}. #' +#' @return A distributional parameter object. +#' @examples +#' +#' # First we call the SIR model spec, and generate some data for calibration. +#' spec = mp_tmb_library("starter_models", "sir", package = "macpan2") +#' data = mp_simulator(spec, 50, "infection") |> mp_trajectory() +#' +#' # Suppose we want to specify a Normal prior on the transmission parameter +#' # beta, and we are interested in estimating the prior standard deviation. +#' # Here we use `mp_fit` to estimate the standard deviation, `sd`, and we +#' # provide a numeric starting value for `sd` in the optimization. +#' cal = mp_tmb_calibrator( +#' spec +#' , data +#' , traj = "infection" +#' , par = list(beta = mp_normal(location = 0.35, sd = mp_fit(0.1))) +#' , default = list(beta = 0.25) +#' ) +#' +#' # When viewing the calibration objective function we can see the additional +#' # prior density term added for beta. The standard deviation parameter has +#' # been automatically named 'distr_params_log_sd_beta'. +#' cal$simulator$tmb_model$obj_fn$obj_fn_expr +#' +#' # Next we optimize and view the fitted parameters. We can see the +#' # distributional parameter in the coefficient table with a default value +#' # equal to the numeric value we provided to `mp_fit` above. +#' mp_optimize(cal) +#' mp_tmb_coef(cal) +#' +#' # If instead we want control over the name of the new fitted distributional +#' # parameter, we can add a new variable to our model specification with the +#' # default value set to the desired optimization starting value. +#' updated_spec = spec |> mp_tmb_insert(default = list(sd_var = 0.1)) +#' +#' # In the calibrator, we use the name of this newly added variable, "sd_var", +#' # as input to `mp_fit`. +#' cal = mp_tmb_calibrator( +#' updated_spec +#' , data +#' , traj = "infection" +#' , par = list(beta = mp_normal(location = 0.35, sd = mp_fit("sd_var"))) +#' , default = list(beta = 0.25) +#' ) +#' +#' # We can see this distributional parameter get propogated to the objective +#' # function and the fitted parameter table. +#' cal$simulator$tmb_model$obj_fn$obj_fn_expr +#' mp_optimize(cal) +#' mp_tmb_coef(cal) #' @name fit_distr_params #' @export mp_fit = function(x, trans = DistrParamTransDefault()) UseMethod("mp_fit") diff --git a/inst/starter_models/shiver/README.Rmd b/inst/starter_models/shiver/README.Rmd index a1580c00..79c885f1 100644 --- a/inst/starter_models/shiver/README.Rmd +++ b/inst/starter_models/shiver/README.Rmd @@ -214,7 +214,7 @@ missed_reports = 10 actual_daily_reports = expected_daily_reports - missed_reports ``` -Daily COVID19 hospitalization data for Ontario was obtained from the [Ontario Data Catalogue](https://data.ontario.ca/dataset/covid-19-vaccine-data-in-ontario/resource/274b819c-5d69-4539-a4db-f2950794138c). The data contains daily hospitalization counts stratitfied by vaccination status and severity of hospitalization care. For simplicity, we aggregate all counts into one. We will use the first data point (August 10, 2021) as the initial condition for H, and the second data point (August 11, 2021) as day 1 of the scenario. To incorporate missed data reporting, we randomly remove `r missed_reports` records. +Daily COVID19 hospitalization data for Ontario was obtained from the [Ontario Data Catalogue](https://data.ontario.ca/dataset/covid-19-vaccine-data-in-ontario/resource/274b819c-5d69-4539-a4db-f2950794138c). The data contains daily hospitalization counts stratified by vaccination status and severity of hospitalization care. For simplicity, we aggregate all counts into one. We will use the first data point (August 10, 2021) as the initial condition for H, and the second data point (August 11, 2021) as day 1 of the scenario. To incorporate missed data reporting, we randomly remove `r missed_reports` records. ```{r observed_data} set.seed(expected_daily_reports) diff --git a/man/distribution.Rd b/man/distribution.Rd index 119d1a51..cd385f77 100644 --- a/man/distribution.Rd +++ b/man/distribution.Rd @@ -9,21 +9,41 @@ \alias{mp_neg_bin} \title{Distributions} \usage{ -mp_uniform() +mp_uniform(default_trans = list()) -mp_normal(location = DistrParam("location"), sd) +mp_normal( + location = DistrParam("location"), + sd, + default_trans = list(location = mp_identity, sd = mp_log) +) -mp_log_normal(location = DistrParam("location"), sd) +mp_log_normal( + location = DistrParam("location"), + sd, + default_trans = list(location = mp_identity, sd = mp_identity) +) -mp_poisson(location = DistrParam("location")) +mp_poisson( + location = DistrParam("location"), + default_trans = list(location = mp_identity) +) -mp_neg_bin(location = DistrParam("location"), disp) +mp_neg_bin( + location = DistrParam("location"), + disp, + default_trans = list(location = mp_identity, disp = mp_log) +) } \arguments{ -\item{location}{Location parameter. Only necessary if used as a prior -distribution. If it is used as a likelihood component the location -parameter will be taken as the simulated variable being fitted to data, -and so this \code{location} parameter should be left to the default.} +\item{default_trans}{Named list of default transformations for each +distributional parameter. See \code{?transform_distr_param} for a list of +available transformations.} + +\item{location}{Location parameter. +Specifying the \code{location} parameter is only necessary when the distribution +is used as a prior distribution. If it is used as a likelihood component the +location parameter will be taken as the simulated variable being fitted to +data, and so this \code{location} parameter should be left to the default.} \item{sd}{Standard deviation parameter.} @@ -34,7 +54,8 @@ Distributions which can be used to specify prior or likelihood components in model calibration. \itemize{ -\item Uniform Distribution (Improper), only appropriate for prior components - \code{mp_uniform} +\item Uniform Distribution (Improper), only appropriate for prior +components - \code{mp_uniform} } \itemize{ @@ -53,3 +74,8 @@ model calibration. \item Negative Binomial Distribution - \code{mp_neg_bin} } } +\details{ +All distributional parameter arguments can be specified either as +a numeric value, a character string giving the parameter name, or a +distributional parameter object (See ?fit_distr_params). +} diff --git a/man/fit_distr_params.Rd b/man/fit_distr_params.Rd index 16ed6af0..c6ea60c1 100644 --- a/man/fit_distr_params.Rd +++ b/man/fit_distr_params.Rd @@ -21,9 +21,62 @@ the associated distribution. For example, the standard deviation parameter \code{sd} in the \code{\link{mp_normal}} distributions has a default log transformation specified using \code{\link{mp_log}}.} } +\value{ +A distributional parameter object. +} \description{ Distributional parameters can be added to the list of parameters that are fit during calibration. By default, distributional parameters in priors and likelihoods are not fit. Use \code{mp_nofit} to exclude distributional parameters from being fit and \code{mp_fit} to fit them. } +\examples{ + +# First we call the SIR model spec, and generate some data for calibration. +spec = mp_tmb_library("starter_models", "sir", package = "macpan2") +data = mp_simulator(spec, 50, "infection") |> mp_trajectory() + +# Suppose we want to specify a Normal prior on the transmission parameter +# beta, and we are interested in estimating the prior standard deviation. +# Here we use `mp_fit` to estimate the standard deviation, `sd`, and we +# provide a numeric starting value for `sd` in the optimization. +cal = mp_tmb_calibrator( + spec + , data + , traj = "infection" + , par = list(beta = mp_normal(location = 0.35, sd = mp_fit(0.1))) + , default = list(beta = 0.25) +) + +# When viewing the calibration objective function we can see the additional +# prior density term added for beta. The standard deviation parameter has +# been automatically named 'distr_params_log_sd_beta'. +cal$simulator$tmb_model$obj_fn$obj_fn_expr + +# Next we optimize and view the fitted parameters. We can see the +# distributional parameter in the coefficient table with a default value +# equal to the numeric value we provided to `mp_fit` above. +mp_optimize(cal) +mp_tmb_coef(cal) + +# If instead we want control over the name of the new fitted distributional +# parameter, we can add a new variable to our model specification with the +# default value set to the desired optimization starting value. +updated_spec = spec |> mp_tmb_insert(default = list(sd_var = 0.1)) + +# In the calibrator, we use the name of this newly added variable, "sd_var", +# as input to `mp_fit`. +cal = mp_tmb_calibrator( + updated_spec + , data + , traj = "infection" + , par = list(beta = mp_normal(location = 0.35, sd = mp_fit("sd_var"))) + , default = list(beta = 0.25) +) + +# We can see this distributional parameter get propogated to the objective +# function and the fitted parameter table. +cal$simulator$tmb_model$obj_fn$obj_fn_expr +mp_optimize(cal) +mp_tmb_coef(cal) +} diff --git a/tests/testthat/test-distributions.R b/tests/testthat/test-distributions.R index d6a91038..d5b5c2a0 100644 --- a/tests/testthat/test-distributions.R +++ b/tests/testthat/test-distributions.R @@ -190,21 +190,43 @@ test_that("default distributional parameter transformation is consistent", { test_that("misspecification of distributional parameters results in the appropriate errors",{ - spec = (mp_tmb_library("starter_models", "sir", package = "macpan2") + spec_emptydefault = (mp_tmb_library("starter_models", "sir", package = "macpan2") |> mp_tmb_insert(default = list(sd = empty_matrix)) ) - sir_data = (mp_simulator(spec, time_steps = 5, outputs = c("I")) + spec_nodefault = (mp_tmb_library("starter_models", "sir", package = "macpan2") + # insert dummy expression to add variable to the spec + |> mp_tmb_insert(expressions = list(sd ~ sd)) + ) + sir_data = (mp_simulator(spec_emptydefault, time_steps = 5, outputs = c("I")) |> mp_trajectory() ) # character misspecification in mp_fit, variable doesn't exist in model - expect_error(mp_tmb_calibrator(spec + expect_error(mp_tmb_calibrator(spec_emptydefault , data = sir_data , traj = list(I = mp_normal(sd = mp_fit("Sd"))) , par = "beta" ) , regexp = "Sd is not in the model spec" ) + + # correct specification in mp_fit, but variable default is `empty_matrix` + expect_error(mp_tmb_calibrator(spec_emptydefault + , data = sir_data + , traj = list(I = mp_normal(sd = mp_fit("sd"))) + , par = "beta" + ) + ) + + # correct specification in mp_fit, but variable has no default + expect_error(mp_tmb_calibrator(spec_nodefault + , data = sir_data + , traj = list(I = mp_normal(sd = mp_fit("sd"))) + , par = "beta" + ) + , regexp = "sd is not in the model spec" + ) + }) test_that("default transformations for distributional parameters can be updated",{ diff --git a/vignettes/likelihood_prior_specs.Rmd b/vignettes/likelihood_prior_specs.Rmd index 7959e399..36baf43c 100644 --- a/vignettes/likelihood_prior_specs.Rmd +++ b/vignettes/likelihood_prior_specs.Rmd @@ -41,20 +41,23 @@ By default, the interface assumes **poisson likelihoods** and **uniform priors** To specify a likelihood and/or prior for a variable in our model we can select a distribution from the list of available distributions,`?macpan2::distribution`. -## Prior Specification +# Use Cases + +## Priors on model parameters Specifying priors is usually done through the `par` argument of `mp_tmb_calibrator`. [Here](https://github.com/canmod/macpan2/tree/main/inst/starter_models/shiver#re-parameterizing-and-introducing-transformations) is an example of prior specification in the SHIVER model. The parameter `logit_p`, the logit transformed proportion `p`, is given a normal prior using the distribution function `macpan2::mp_normal` and two numeric inputs for the `location` and standard deviation, `sd`. The remaining parameters are given an improper uniform prior using `macpan2::mp_uniform`. -## Likelihood Specification +## Likelihoods on model parameters A likelihood can be specified for the trajectory variables in our calibration set-up, those identified in the `traj` argument of `mp_tmb_calibrator`. Further into the SHIVER [example](https://github.com/canmod/macpan2/tree/main/inst/starter_models/shiver#fitting-to-multiple-trajectories), the variables hospitalizations(`H`) and `reported_incidence` are both specified with a negative binomial likelihood using the `macpan2::mp_neg_bin` function. For likelihoods the location parameter is not set because the calibration machinery will use the simulated value for this trajectory as the location. The dispersion parameter for `mp_neg_bin` is required. -## Fitting Distributional Parameters + +## Fixed distributional parameters Distributional parameters are those parameters that characterize the distribution. Often these are the location and standard deviation. By default, these parameters are assumed fixed and not fit. This was the case in the previous [Prior Specification] example where the distributional parameters for `location` and `sd` were specified as numeric constants. @@ -62,11 +65,21 @@ Distributional parameters are also assumed fixed when set to the name of an exis Ex. `mp_normal(sd = "sd_var")` +## Fitting Distributional Parameters + Distributional parameters however, can be fit in the calibration framework in addition to other parameters using `macpan2::mp_fit`. See `?macpan2::fit_distr_params` for details. The previous example in [Likelihood Specification] shows the negative binomial dispersion parameter being fit with `mp_fit`. The numeric value provided for dispersion is the starting value for the optimization routine. After optimization, we can see the fitted dispersion distributional parameters in the coefficient table. By default, they are named with a leading `distr_params_` followed by their distributional parameter name and corresponding model variable name. ### Distributional Parameter Transformations -Distributional parameters have default parameter transformations inherited from their respective distribution. For example, standard deviations by definition are a strictly positive number, so the log transformation is convenient to use to ensure this condition is met. +Distributional parameters have default parameter transformations inherited from their respective distribution. See the `default_trans` argument for each distribution (`?macpan2::distribution`). For example, standard deviations by definition are a strictly positive number, so the log transformation is convenient to use to ensure this condition is met. + +Defaults can be changed by either passing a distributional parameter transformation function `?macpan2::transform_distr_param` to the `trans` argument in `?macpan2::fit_distr_params` functions to change a single transformation. To update all transformations, a named list of transformations from `?macpan2::transform_distr_param` for each distributional parameter can be given to the `default_trans` argument of the distribution. -Defaults can be changed by passing a distributional parameter transformation function `?macpan2::transform_distr_param` to the `trans` argument in `?macpan2::fit_distr_params` functions. +## Priors on distributional parameters + +``` + + + +```