Merge pull request #33 from CDCgov/toy-model-generate-data

Toy model generate data
CDCgov · Feb 13, 2024 · e786bfb · e786bfb
2 parents 21c3110 + 89ce390
commit e786bfb
Show file tree

Hide file tree

Showing 7 changed files with 225 additions and 25 deletions.
diff --git a/EpiAware/src/epimodel.jl b/EpiAware/src/epimodel.jl
@@ -1,24 +1,24 @@
 abstract type AbstractEpiModel end
 
 
+
 """
-    struct EpiModel{T}
+    struct EpiModel{T<:Real} <: AbstractEpiModel
 
-The `EpiModel` struct represents a basic renewal model.
+EpiModel represents an epidemiological model with generation intervals, delay intervals, and observation delay kernel.
 
 # Fields
-- `gen_int::Vector{T}`: Discrete generation distribution.
-- `delay_int::Vector{T}`: Discrete delay distribution.
-- `delay_kernel::SparseMatrixCSC{T,Integer}`: Sparse matrix representing the action of convoluting
-    a series of infections with observation delay.
-- `cluster_coeff::T`: Cluster coefficient for negative binomial distribution of observations.
-- `len_gen_int::Integer`: Length of `gen_int` vector.
-- `len_delay_int::Integer`: Length of `delay_int` vector.
+- `gen_int::Vector{T}`: Discrete generation inteval, runs from 1, 2, ... to the end of the vector.
+- `delay_int::Vector{T}`: Discrete delay distribution runs from 0, 1, ... to the end of the vector less 1.
+- `delay_kernel::SparseMatrixCSC{T,Integer}`: Sparse matrix representing the observation delay kernel.
+- `cluster_coeff::T`: Cluster coefficient for negative binomial observations.
+- `len_gen_int::Integer`: Length of `gen_int`.
+- `len_delay_int::Integer`: Length of `delay_int`.
+- `time_horizon::Integer`: Length of the generated data.
 
 # Constructors
-- `EpiModel(gen_int, delay_int, truth_cluster_coeff, time_horizon)`: Constructs an `EpiModel` object.
-
-The `EpiModel` struct represents an epidemiological model with generation intervals, delay intervals, delay kernel, truth cluster coefficient, and length of generation and delay intervals.
+- `EpiModel(gen_int, delay_int, cluster_coeff, time_horizon::Integer)`: Constructs an EpiModel object with given generation intervals, delay intervals, cluster coefficient, and time horizon.
+- `EpiModel(gen_distribution::ContinuousDistribution, delay_distribution::ContinuousDistribution, cluster_coeff, time_horizon::Integer; Δd = 1.0, D_gen, D_delay)`: Constructs an EpiModel object with generation and delay distributions, cluster coefficient, time horizon, and optional parameters.
 
 """
 struct EpiModel{T<:Real} <: AbstractEpiModel
@@ -28,14 +28,15 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
     cluster_coeff::T
     len_gen_int::Integer #length(gen_int) just to save recalc
     len_delay_int::Integer #length(delay_int) just to save recalc
+    time_horizon::Integer
 
     #Inner constructors for EpiModel object
-    function EpiModel(gen_int, delay_int, cluster_coeff, time_horizon)
+    function EpiModel(gen_int, delay_int, cluster_coeff, time_horizon::Integer)
         @assert all(gen_int .>= 0) "Generation interval must be non-negative"
         @assert all(delay_int .>= 0) "Delay interval must be non-negative"
         @assert sum(gen_int) ≈ 1 "Generation interval must sum to 1"
         @assert sum(delay_int) ≈ 1 "Delay interval must sum to 1"
-        #construct observation delay kernel
+
         K = generate_observation_kernel(delay_int, time_horizon)
 
         new{eltype(gen_int)}(
@@ -45,14 +46,15 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
             cluster_coeff,
             length(gen_int),
             length(delay_int),
+            time_horizon,
         )
     end
 
     function EpiModel(
         gen_distribution::ContinuousDistribution,
         delay_distribution::ContinuousDistribution,
         cluster_coeff,
-        time_horizon;
+        time_horizon::Integer;
         Δd = 1.0,
         D_gen,
         D_delay,
@@ -62,8 +64,6 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
             p -> p[2:end] ./ sum(p[2:end])
         delay_int = create_discrete_pmf(delay_distribution, Δd = Δd, D = D_delay)
 
-        #construct observation delay kernel
-        #Recall first element is zero delay
         K = generate_observation_kernel(delay_int, time_horizon)
 
         new{eltype(gen_int)}(
@@ -73,6 +73,7 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
             cluster_coeff,
             length(gen_int),
             length(delay_int),
+            time_horizon,
         )
     end
 end

diff --git a/EpiAware/src/latent-processes.jl b/EpiAware/src/latent-processes.jl
@@ -1,3 +1,7 @@
+const STANDARD_RW_PRIORS =
+    (var_RW_dist = truncated(Normal(0.0, 0.05), 0.0, Inf), init_rw_value_dist = Normal())
+
+
 """
     random_walk(n, ϵ_t = missing; latent_process_priors = (var_RW_dist = truncated(Normal(0., 0.05), 0., Inf),), ::Type{T} = Float64) where {T <: Real}
 
@@ -16,12 +20,13 @@ Constructs a random walk model.
     n,
     ϵ_t = missing,
     ::Type{T} = Float64;
-    latent_process_priors = (var_RW_dist = truncated(Normal(0.0, 0.05), 0.0, Inf),),
+    latent_process_priors = STANDARD_RW_PRIORS,
 ) where {T<:Real}
     rw = Vector{T}(undef, n)
     ϵ_t ~ MvNormal(ones(n))
     σ²_RW ~ latent_process_priors.var_RW_dist
+    init_rw_value ~ latent_process_priors.init_rw_value_dist
     σ_RW = sqrt(σ²_RW)
-    rw .= cumsum(σ_RW * ϵ_t)
-    return rw, (; σ_RW)
+    rw .= init_rw_value .+ cumsum(σ_RW * ϵ_t)
+    return rw, (; σ_RW, init_rw_value)
 end
diff --git a/EpiAware/src/models.jl b/EpiAware/src/models.jl
@@ -1,10 +1,39 @@
+"""
+    log_infections(y_t, epimodel::EpiModel, latent_process;
+                   latent_process_priors,
+                   transform_function = exp,
+                   n_generate_ahead = 0,
+                   pos_shift = 1e-6,
+                   neg_bin_cluster_factor = missing,
+                   neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3))
+
+A Turing model for Log-infections undelying observed epidemiological data.
+
+This function defines a log-infections model for epidemiological data.
+It takes the observed data `y_t`, an `EpiModel` object `epimodel`, and a `latent_process`
+model. It also accepts optional arguments for the `latent_process_priors`, `transform_function`,
+`n_generate_ahead`, `pos_shift`, `neg_bin_cluster_factor`, and `neg_bin_cluster_factor_prior`.
+
+## Arguments
+- `y_t`: Observed data.
+- `epimodel`: Epidemiological model.
+- `latent_process`: Latent process model.
+- `latent_process_priors`: Priors for the latent process model.
+- `transform_function`: Function to transform the latent process into infections. Default is `exp`.
+- `n_generate_ahead`: Number of time steps to generate ahead. Default is `0`.
+- `pos_shift`: Positive shift to avoid zero values. Default is `1e-6`.
+- `neg_bin_cluster_factor`: Missing value for the negative binomial cluster factor. Default is `missing`.
+- `neg_bin_cluster_factor_prior`: Prior distribution for the negative binomial cluster factor. Default is `Gamma(3, 0.05 / 3)`.
+
+## Returns
+A named tuple containing the generated quantities `I_t` and `latent_process_parameters`.
+"""
 @model function log_infections(
     y_t,
     epimodel::EpiModel,
     latent_process;
     latent_process_priors,
     transform_function = exp,
-    n_generate_ahead = 0,
     pos_shift = 1e-6,
     neg_bin_cluster_factor = missing,
     neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3),
@@ -13,16 +42,17 @@
     neg_bin_cluster_factor ~ neg_bin_cluster_factor_prior
 
     #Latent process
-    time_steps = length(y_t) + n_generate_ahead
+    time_steps = epimodel.time_horizon
     @submodel _I_t, latent_process_parameters =
-        latent_process(data_length; latent_process_priors = latent_process_priors)
+        latent_process(time_steps; latent_process_priors = latent_process_priors)
 
     #Transform into infections
     I_t = transform_function.(_I_t)
 
     #Predictive distribution
     case_pred_dists =
-        (epimodel.delay_kernel * I_t) .+ pos_shift .|> μ -> mean_cc_neg_bin(μ, α)
+        (epimodel.delay_kernel * I_t) .+ pos_shift .|>
+        μ -> mean_cc_neg_bin(μ, neg_bin_cluster_factor)
 
     #Likelihood
     y_t ~ arraydist(case_pred_dists)

diff --git a/EpiAware/test/Project.toml b/EpiAware/test/Project.toml
@@ -4,6 +4,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
 TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"

diff --git a/EpiAware/test/predictive_checking/toy_model_log_infs_RW.jl b/EpiAware/test/predictive_checking/toy_model_log_infs_RW.jl
@@ -0,0 +1,123 @@
+using Pkg: generate
+#=
+# Toy model for running analysis:
+
+This is a toy model for demonstrating current functionality of EpiAware package.
+
+## Generative Model without data
+
+### Latent Process
+
+The latent process is a random walk defined by a Turing model `random_walk` of specified length `n`.
+
+_Unfixed parameters_:
+- `σ²_RW`: The variance of the random walk process. Current defauly prior is
+- `init_rw_value`: The initial value of the random walk process.
+- `ϵ_t`: The random noise vector.
+
+```math
+\begin{align}
+X(t) &= X(0) + \sigma_{RW} \sum_{t= 1}^n \epsilon_t \\
+X(0) &\sim \mathcal{N}(0, 1) \\
+\epsilon_t &\sim \mathcal{N}(0, 1) \\
+\sigma_{RW} &\sim \text{HalfNormal}(0.05).
+\end{align}
+```
+
+### Log-Infections Model
+
+The log-infections model is defined by a Turing model `log_infections` that takes the observed data `y_t` (or `missing` value),
+an `EpiModel` object `epimodel`, and a `latent_process` model. In this case the latent process is a random walk model.
+
+It also accepts optional arguments for the `latent_process_priors`, `transform_function`, `pos_shift`, `neg_bin_cluster_factor`, and `neg_bin_cluster_factor_prior`.
+
+```math
+\log I_t = \exp(X(t)).
+```
+
+### Observation model
+
+The observation model is a negative binomial distribution with mean `μ` and cluster factor `r`. Delays are implemented
+as the action of a sparse kernel on the infections $I(t)$. The delay kernel is contained in an `EpiModel` struct.
+
+```math
+\begin{align}
+y_t &\sim \text{NegBinomial}(\mu = \sum_s\geq 0 K[t, t-s] I(s), r),
+r &\sim \text{Gamma}(3, 0.05/3).
+\end{align}
+```
+
+## Load dependencies in `TestEnv`
+
+=#
+
+split(pwd(), "/")[end] != "EpiAware" && begin
+    cd("./EpiAware")
+    using Pkg
+    Pkg.activate(".")
+
+    using TestEnv
+    TestEnv.activate()
+end
+
+using EpiAware
+using Turing
+using Distributions
+using StatsPlots
+using Random
+using DynamicPPL
+Random.seed!(0)
+
+#=
+## Create an `EpiModel` struct
+Somewhat randomly chosen parameters for the `EpiModel` struct.
+
+=#
+
+truth_GI = Gamma(1, 2)
+truth_delay = Uniform(0.0, 5.0)
+neg_bin_cluster_factor = 0.05
+time_horizon = 100
+
+epimodel = EpiModel(
+    truth_GI,
+    truth_delay,
+    neg_bin_cluster_factor,
+    time_horizon,
+    D_gen = 10.0,
+    D_delay = 10.0,
+)
+
+#=
+## Define a log-infections model
+The log-infections model is defined by a Turing model `log_infections`.
+
+In this case we don't have observed data, so we use `missing` value for `y_t`.
+=#
+toy_log_infs = log_infections(
+    missing,
+    epimodel,
+    random_walk;
+    latent_process_priors = EpiAware.STANDARD_RW_PRIORS,
+)
+
+#=
+## Sample from the model
+I define a fixed version of the model with initial infections set to 10 and variance of the random walk process set to 0.1.
+We can sample from the model using the `rand` function, and plot the generated infections against generated cases.
+=#
+cond_toy = fix(toy_log_infs, (init_rw_value = log(10.0), σ²_RW = 0.1))
+random_epidemic = rand(cond_toy)
+
+# We can get the generated infections using `generated_quantities` function. Because the observed
+# cases are "defined" with a `~` operator they can be accessed directly from the randomly sampled
+# process.
+gen = generated_quantities(cond_toy, random_epidemic)
+plot(
+    gen.I_t,
+    label = "I_t",
+    xlabel = "Time",
+    ylabel = "Infections",
+    title = "Generated Infections",
+)
+scatter!(X.y_t, lab = "generated cases")
diff --git a/EpiAware/test/test_latent-processes.jl b/EpiAware/test/test_latent-processes.jl
@@ -3,7 +3,7 @@
     using DynamicPPL, Turing
     n = 5
     model = random_walk(n)
-    fixed_model = fix(model, (σ²_RW = 1.0,)) #Fixing the standard deviation of the random walk process
+    fixed_model = fix(model, (σ²_RW = 1.0, init_rw_value = 0.0)) #Fixing the standard deviation of the random walk process
     n_samples = 1000
     samples_day_5 =
         sample(fixed_model, Prior(), n_samples) |>

diff --git a/EpiAware/test/test_models.jl b/EpiAware/test/test_models.jl
@@ -0,0 +1,40 @@
+
+@testitem "`log_infections` with RW latent process" begin
+    using Distributions, Turing, DynamicPPL
+    # Define test inputs
+    y_t = missing # Data will be generated from the model
+    epimodel = EpiModel([0.2, 0.3, 0.5], [0.1, 0.4, 0.5], 0.8, 10)
+    latent_process_priors = EpiAware.STANDARD_RW_PRIORS
+    transform_function = exp
+    n_generate_ahead = 0
+    pos_shift = 1e-6
+    neg_bin_cluster_factor = 0.5
+    neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3)
+
+    # Call the function
+    test_mdl = log_infections(
+        y_t,
+        epimodel,
+        random_walk;
+        latent_process_priors,
+        transform_function,
+        pos_shift,
+        neg_bin_cluster_factor,
+        neg_bin_cluster_factor_prior,
+    )
+
+    # Define expected outputs for a conditional model
+    # Underlying log-infections are const value 1 for all time steps and
+    # any other unfixed parameters
+
+    fixed_test_mdl = fix(
+        test_mdl,
+        (init_rw_value = log(1.0), σ²_RW = 0.0, neg_bin_cluster_factor = 0.05),
+    )
+    X = rand(fixed_test_mdl)
+    expected_I_t = [1.0 for _ = 1:epimodel.time_horizon]
+    gen = generated_quantities(fixed_test_mdl, rand(fixed_test_mdl))
+
+    # Perform tests
+    @test gen.I_t ≈ expected_I_t
+end