greta-dev · goldingn · Jan 7, 2019 · Jan 7, 2019 · Jan 7, 2019 · Jan 7, 2019
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -65,6 +65,8 @@ Collate:
     'internals.R'
     'calculate.R'
     'callbacks.R'
+    'marginalise.R'
+    'marginalisers.R'
     'simulate.R'
     'chol2symm.R'
 Imports:

diff --git a/NAMESPACE b/NAMESPACE
@@ -99,6 +99,7 @@ S3method(plot,greta_model)
 S3method(print,greta_array)
 S3method(print,greta_model)
 S3method(print,initials)
+S3method(print,marginaliser)
 S3method(print,optimiser)
 S3method(print,sampler)
 S3method(print,summary.greta_array)
@@ -164,6 +165,7 @@ export(cov2cor)
 export(diag)
 export(dirichlet)
 export(dirichlet_multinomial)
+export(discrete_marginalisation)
 export(distribution)
 export(eigen)
 export(exponential)
@@ -188,12 +190,14 @@ export(iprobit)
 export(joint)
 export(l_bfgs_b)
 export(laplace)
+export(laplace_approximation)
 export(lkj_correlation)
 export(log10.greta_array)
 export(log1pe)
 export(log2.greta_array)
 export(logistic)
 export(lognormal)
+export(marginalise)
 export(mcmc)
 export(mixture)
 export(model)

diff --git a/R/dag_class.R b/R/dag_class.R
@@ -54,11 +54,16 @@ dag_class <- R6Class(
     # float type
     on_graph = function(expr) {
 
-      # temporarily pass float type info to options, so it can be accessed by
-      # nodes on definition, without cluncky explicit passing
+      # temporarily pass float type and batch size info to options, so it can be
+      # accessed by nodes on definition, without clunky explicit passing
       old_float_type <- options()$greta_tf_float
-      on.exit(options(greta_tf_float = old_float_type))
-      options(greta_tf_float = self$tf_float)
+      old_batch_size <- options()$greta_batch_size
+
+      on.exit(options(greta_tf_float = old_float_type,
+                      greta_batch_size = old_batch_size))
+
+      options(greta_tf_float = self$tf_float,
+              greta_batch_size = self$tf_environment$batch_size)
 
       with(self$tf_graph$as_default(), expr)
     },
@@ -375,7 +380,7 @@ dag_class <- R6Class(
     },
 
     # define tensor for overall log density and gradients
-    define_joint_density = function() {
+    define_joint_density = function(adjusted = TRUE) {
 
       tfe <- self$tf_environment
 
@@ -392,6 +397,12 @@ dag_class <- R6Class(
                           target_nodes,
                           SIMPLIFY = FALSE)
 
+      # assign the un-reduced densities, for use in marginalisation
+      names(densities) <- NULL
+      assign("component_densities",
+             densities,
+             envir = self$tf_environment)
+
       # reduce_sum each of them (skipping the batch dimension)
       self$on_graph(summed_densities <- lapply(densities, tf_sum, drop = TRUE))
 
@@ -404,43 +415,37 @@ dag_class <- R6Class(
              joint_density,
              envir = self$tf_environment)
 
-      # define adjusted joint density
+      if (adjusted) {
 
-      # get names of Jacobian adjustment tensors for all variable nodes
-      adj_names <- paste0(self$get_tf_names(types = "variable"), "_adj")
+        # get names of adjustment tensors for all variable nodes
+        adj_names <- paste0(self$get_tf_names(types = "variable"), "_adj")
 
-      # get TF density tensors for all distribution
-      adj <- lapply(adj_names, get, envir = self$tf_environment)
+        # get TF density tensors for all distribution
+        adj <- lapply(adj_names, get, envir = self$tf_environment)
 
-      # remove their names and sum them together (accounting for tfp bijectors
-      # sometimes returning a scalar tensor)
-      names(adj) <- NULL
-      adj <- match_batches(adj)
-      self$on_graph(total_adj <- tf$add_n(adj))
+        # remove their names and sum them together (accounting for tfp bijectors
+        # sometimes returning a scalar tensor)
+        adj <- match_batches(adj)
 
-      # assign overall density to environment
-      assign("joint_density_adj",
-             joint_density + total_adj,
-             envir = self$tf_environment)
+        # remove their names and sum them together
+        names(adj) <- NULL
+        self$on_graph(total_adj <- tf$add_n(adj))
+
+        # assign overall density to environment
+        assign("joint_density_adj",
+               joint_density + total_adj,
+               envir = self$tf_environment)
+
+      }
 
     },
 
     # evaluate the (truncation-corrected) density of a tfp distribution on its
     # target tensor
     evaluate_density = function(distribution_node, target_node) {
 
-      tfe <- self$tf_environment
-
-      parameter_nodes <- distribution_node$parameters
-
-      # get the tensorflow objects for these
-      distrib_constructor <- self$get_tf_object(distribution_node)
+      tfp_distribution <- self$get_tf_object(distribution_node)
       tf_target <- self$get_tf_object(target_node)
-      tf_parameter_list <- lapply(parameter_nodes, self$get_tf_object)
-
-      # execute the distribution constructor functions to return a tfp
-      # distribution object
-      tfp_distribution <- distrib_constructor(tf_parameter_list, dag = self)
 
       self$tf_evaluate_density(tfp_distribution,
                                tf_target,
@@ -520,6 +525,9 @@ dag_class <- R6Class(
         for (name in data_names)
           tfe[[name]] <- tfe_old[[name]]
 
+        # copy the batch size over
+        tfe$batch_size <- tfe_old$batch_size
+
         # put the free state in the environment, and build out the tf graph
         tfe$free_state <- free_state
         self$define_tf_body()
@@ -775,25 +783,10 @@ dag_class <- R6Class(
 
     },
 
-    # get the tfp distribution object for a distribution node
-    get_tfp_distribution = function(distrib_node) {
-
-      # build the tfp distribution object for the distribution, and use it
-      # to get the tensor for the sample
-      distrib_constructor <- self$get_tf_object(distrib_node)
-      parameter_nodes <- distrib_node$parameters
-      tf_parameter_list <- lapply(parameter_nodes, self$get_tf_object)
-
-      # execute the distribution constructor functions to return a tfp
-      # distribution object
-      tfp_distribution <- distrib_constructor(tf_parameter_list, dag = self)
-
-    },
-
     # try to draw a random sample from a distribution node
     draw_sample = function(distribution_node) {
 
-      tfp_distribution <- self$get_tfp_distribution(distribution_node)
+      tfp_distribution <- self$get_tf_object(distribution_node)
 
       sample <- tfp_distribution$sample
 

diff --git a/R/inference.R b/R/inference.R
@@ -291,7 +291,6 @@ run_samplers <- function(samplers,
   thin <- as.integer(thin)
 
   dag <- samplers[[1]]$model$dag
-  chains <- samplers[[1]]$n_chains
   n_cores <- check_n_cores(n_cores, length(samplers), plan_is)
   float_type <- dag$tf_float
 

diff --git a/R/inference_class.R b/R/inference_class.R
@@ -46,8 +46,6 @@ inference <- R6Class(
       free_parameters <- model$dag$example_parameters(free = TRUE)
       free_parameters <- unlist_tf(free_parameters)
       self$n_free <- length(free_parameters)
-      self$set_initial_values(initial_values)
-      self$n_traced <- length(model$dag$trace_values(self$free_state))
       self$seed <- seed
 
     },
@@ -277,8 +275,6 @@ sampler <- R6Class(
                        parameters = parameters,
                        seed = seed)
 
-      self$n_chains <- nrow(self$free_state)
-
       # duplicate diag_sd if needed
       n_diag <- length(self$parameters$diag_sd)
       n_parameters <- self$n_free
@@ -289,6 +285,8 @@ sampler <- R6Class(
 
       # define the draws tensor on the tf graph
       self$define_tf_draws()
+      self$set_initial_values(initial_values)
+      self$n_chains <- nrow(self$free_state)
 
     },
 
@@ -1025,6 +1023,8 @@ optimiser <- R6Class(
 
       self$create_optimiser_objective()
       self$create_tf_minimiser()
+      self$set_initial_values(initial_values)
+      self$n_traced <- length(model$dag$trace_values(self$free_state))
 
     },
 

diff --git a/R/joint.R b/R/joint.R
@@ -124,15 +124,15 @@ joint_distribution <- R6Class(
 
     tf_distrib = function(parameters, dag) {
 
-      # get information from the *nodes* for component distributions, not the tf
-      # objects passed in here
+      # get tfp distributions
+      tfp_distributions <- parameters
+      names(tfp_distributions) <- NULL
 
-      # get tfp distributions, truncations, & bounds of component distributions
+      # get information on truncations, & bounds of component distributions from
+      # the *nodes* for component distributions
       distribution_nodes <- self$parameters
       truncations <- lapply(distribution_nodes, member, "truncation")
       bounds <- lapply(distribution_nodes, member, "bounds")
-      tfp_distributions <- lapply(distribution_nodes, dag$get_tfp_distribution)
-      names(tfp_distributions) <- NULL
 
       log_prob <- function(x) {