Skip to content

Commit

Permalink
Merge pull request #33 from CDCgov/toy-model-generate-data
Browse files Browse the repository at this point in the history
Toy model generate data
  • Loading branch information
seabbs authored Feb 13, 2024
2 parents 21c3110 + 89ce390 commit e786bfb
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 25 deletions.
35 changes: 18 additions & 17 deletions EpiAware/src/epimodel.jl
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
abstract type AbstractEpiModel end



"""
struct EpiModel{T}
struct EpiModel{T<:Real} <: AbstractEpiModel
The `EpiModel` struct represents a basic renewal model.
EpiModel represents an epidemiological model with generation intervals, delay intervals, and observation delay kernel.
# Fields
- `gen_int::Vector{T}`: Discrete generation distribution.
- `delay_int::Vector{T}`: Discrete delay distribution.
- `delay_kernel::SparseMatrixCSC{T,Integer}`: Sparse matrix representing the action of convoluting
a series of infections with observation delay.
- `cluster_coeff::T`: Cluster coefficient for negative binomial distribution of observations.
- `len_gen_int::Integer`: Length of `gen_int` vector.
- `len_delay_int::Integer`: Length of `delay_int` vector.
- `gen_int::Vector{T}`: Discrete generation inteval, runs from 1, 2, ... to the end of the vector.
- `delay_int::Vector{T}`: Discrete delay distribution runs from 0, 1, ... to the end of the vector less 1.
- `delay_kernel::SparseMatrixCSC{T,Integer}`: Sparse matrix representing the observation delay kernel.
- `cluster_coeff::T`: Cluster coefficient for negative binomial observations.
- `len_gen_int::Integer`: Length of `gen_int`.
- `len_delay_int::Integer`: Length of `delay_int`.
- `time_horizon::Integer`: Length of the generated data.
# Constructors
- `EpiModel(gen_int, delay_int, truth_cluster_coeff, time_horizon)`: Constructs an `EpiModel` object.
The `EpiModel` struct represents an epidemiological model with generation intervals, delay intervals, delay kernel, truth cluster coefficient, and length of generation and delay intervals.
- `EpiModel(gen_int, delay_int, cluster_coeff, time_horizon::Integer)`: Constructs an EpiModel object with given generation intervals, delay intervals, cluster coefficient, and time horizon.
- `EpiModel(gen_distribution::ContinuousDistribution, delay_distribution::ContinuousDistribution, cluster_coeff, time_horizon::Integer; Δd = 1.0, D_gen, D_delay)`: Constructs an EpiModel object with generation and delay distributions, cluster coefficient, time horizon, and optional parameters.
"""
struct EpiModel{T<:Real} <: AbstractEpiModel
Expand All @@ -28,14 +28,15 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
cluster_coeff::T
len_gen_int::Integer #length(gen_int) just to save recalc
len_delay_int::Integer #length(delay_int) just to save recalc
time_horizon::Integer

#Inner constructors for EpiModel object
function EpiModel(gen_int, delay_int, cluster_coeff, time_horizon)
function EpiModel(gen_int, delay_int, cluster_coeff, time_horizon::Integer)
@assert all(gen_int .>= 0) "Generation interval must be non-negative"
@assert all(delay_int .>= 0) "Delay interval must be non-negative"
@assert sum(gen_int) 1 "Generation interval must sum to 1"
@assert sum(delay_int) 1 "Delay interval must sum to 1"
#construct observation delay kernel

K = generate_observation_kernel(delay_int, time_horizon)

new{eltype(gen_int)}(
Expand All @@ -45,14 +46,15 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
cluster_coeff,
length(gen_int),
length(delay_int),
time_horizon,
)
end

function EpiModel(
gen_distribution::ContinuousDistribution,
delay_distribution::ContinuousDistribution,
cluster_coeff,
time_horizon;
time_horizon::Integer;
Δd = 1.0,
D_gen,
D_delay,
Expand All @@ -62,8 +64,6 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
p -> p[2:end] ./ sum(p[2:end])
delay_int = create_discrete_pmf(delay_distribution, Δd = Δd, D = D_delay)

#construct observation delay kernel
#Recall first element is zero delay
K = generate_observation_kernel(delay_int, time_horizon)

new{eltype(gen_int)}(
Expand All @@ -73,6 +73,7 @@ struct EpiModel{T<:Real} <: AbstractEpiModel
cluster_coeff,
length(gen_int),
length(delay_int),
time_horizon,
)
end
end
Expand Down
11 changes: 8 additions & 3 deletions EpiAware/src/latent-processes.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
const STANDARD_RW_PRIORS =
(var_RW_dist = truncated(Normal(0.0, 0.05), 0.0, Inf), init_rw_value_dist = Normal())


"""
random_walk(n, ϵ_t = missing; latent_process_priors = (var_RW_dist = truncated(Normal(0., 0.05), 0., Inf),), ::Type{T} = Float64) where {T <: Real}
Expand All @@ -16,12 +20,13 @@ Constructs a random walk model.
n,
ϵ_t = missing,
::Type{T} = Float64;
latent_process_priors = (var_RW_dist = truncated(Normal(0.0, 0.05), 0.0, Inf),),
latent_process_priors = STANDARD_RW_PRIORS,
) where {T<:Real}
rw = Vector{T}(undef, n)
ϵ_t ~ MvNormal(ones(n))
σ²_RW ~ latent_process_priors.var_RW_dist
init_rw_value ~ latent_process_priors.init_rw_value_dist
σ_RW = sqrt(σ²_RW)
rw .= cumsum(σ_RW * ϵ_t)
return rw, (; σ_RW)
rw .= init_rw_value .+ cumsum(σ_RW * ϵ_t)
return rw, (; σ_RW, init_rw_value)
end
38 changes: 34 additions & 4 deletions EpiAware/src/models.jl
Original file line number Diff line number Diff line change
@@ -1,10 +1,39 @@
"""
log_infections(y_t, epimodel::EpiModel, latent_process;
latent_process_priors,
transform_function = exp,
n_generate_ahead = 0,
pos_shift = 1e-6,
neg_bin_cluster_factor = missing,
neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3))
A Turing model for Log-infections undelying observed epidemiological data.
This function defines a log-infections model for epidemiological data.
It takes the observed data `y_t`, an `EpiModel` object `epimodel`, and a `latent_process`
model. It also accepts optional arguments for the `latent_process_priors`, `transform_function`,
`n_generate_ahead`, `pos_shift`, `neg_bin_cluster_factor`, and `neg_bin_cluster_factor_prior`.
## Arguments
- `y_t`: Observed data.
- `epimodel`: Epidemiological model.
- `latent_process`: Latent process model.
- `latent_process_priors`: Priors for the latent process model.
- `transform_function`: Function to transform the latent process into infections. Default is `exp`.
- `n_generate_ahead`: Number of time steps to generate ahead. Default is `0`.
- `pos_shift`: Positive shift to avoid zero values. Default is `1e-6`.
- `neg_bin_cluster_factor`: Missing value for the negative binomial cluster factor. Default is `missing`.
- `neg_bin_cluster_factor_prior`: Prior distribution for the negative binomial cluster factor. Default is `Gamma(3, 0.05 / 3)`.
## Returns
A named tuple containing the generated quantities `I_t` and `latent_process_parameters`.
"""
@model function log_infections(
y_t,
epimodel::EpiModel,
latent_process;
latent_process_priors,
transform_function = exp,
n_generate_ahead = 0,
pos_shift = 1e-6,
neg_bin_cluster_factor = missing,
neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3),
Expand All @@ -13,16 +42,17 @@
neg_bin_cluster_factor ~ neg_bin_cluster_factor_prior

#Latent process
time_steps = length(y_t) + n_generate_ahead
time_steps = epimodel.time_horizon
@submodel _I_t, latent_process_parameters =
latent_process(data_length; latent_process_priors = latent_process_priors)
latent_process(time_steps; latent_process_priors = latent_process_priors)

#Transform into infections
I_t = transform_function.(_I_t)

#Predictive distribution
case_pred_dists =
(epimodel.delay_kernel * I_t) .+ pos_shift .|> μ -> mean_cc_neg_bin(μ, α)
(epimodel.delay_kernel * I_t) .+ pos_shift .|>
μ -> mean_cc_neg_bin(μ, neg_bin_cluster_factor)

#Likelihood
y_t ~ arraydist(case_pred_dists)
Expand Down
1 change: 1 addition & 0 deletions EpiAware/test/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DynamicPPL = "366bfd00-2699-11ea-058f-f148b4cae6d8"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
Expand Down
123 changes: 123 additions & 0 deletions EpiAware/test/predictive_checking/toy_model_log_infs_RW.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
using Pkg: generate
#=
# Toy model for running analysis:
This is a toy model for demonstrating current functionality of EpiAware package.
## Generative Model without data
### Latent Process
The latent process is a random walk defined by a Turing model `random_walk` of specified length `n`.
_Unfixed parameters_:
- `σ²_RW`: The variance of the random walk process. Current defauly prior is
- `init_rw_value`: The initial value of the random walk process.
- `ϵ_t`: The random noise vector.
```math
\begin{align}
X(t) &= X(0) + \sigma_{RW} \sum_{t= 1}^n \epsilon_t \\
X(0) &\sim \mathcal{N}(0, 1) \\
\epsilon_t &\sim \mathcal{N}(0, 1) \\
\sigma_{RW} &\sim \text{HalfNormal}(0.05).
\end{align}
```
### Log-Infections Model
The log-infections model is defined by a Turing model `log_infections` that takes the observed data `y_t` (or `missing` value),
an `EpiModel` object `epimodel`, and a `latent_process` model. In this case the latent process is a random walk model.
It also accepts optional arguments for the `latent_process_priors`, `transform_function`, `pos_shift`, `neg_bin_cluster_factor`, and `neg_bin_cluster_factor_prior`.
```math
\log I_t = \exp(X(t)).
```
### Observation model
The observation model is a negative binomial distribution with mean `μ` and cluster factor `r`. Delays are implemented
as the action of a sparse kernel on the infections $I(t)$. The delay kernel is contained in an `EpiModel` struct.
```math
\begin{align}
y_t &\sim \text{NegBinomial}(\mu = \sum_s\geq 0 K[t, t-s] I(s), r),
r &\sim \text{Gamma}(3, 0.05/3).
\end{align}
```
## Load dependencies in `TestEnv`
=#

split(pwd(), "/")[end] != "EpiAware" && begin
cd("./EpiAware")
using Pkg
Pkg.activate(".")

using TestEnv
TestEnv.activate()
end

using EpiAware
using Turing
using Distributions
using StatsPlots
using Random
using DynamicPPL
Random.seed!(0)

#=
## Create an `EpiModel` struct
Somewhat randomly chosen parameters for the `EpiModel` struct.
=#

truth_GI = Gamma(1, 2)
truth_delay = Uniform(0.0, 5.0)
neg_bin_cluster_factor = 0.05
time_horizon = 100

epimodel = EpiModel(
truth_GI,
truth_delay,
neg_bin_cluster_factor,
time_horizon,
D_gen = 10.0,
D_delay = 10.0,
)

#=
## Define a log-infections model
The log-infections model is defined by a Turing model `log_infections`.
In this case we don't have observed data, so we use `missing` value for `y_t`.
=#
toy_log_infs = log_infections(
missing,
epimodel,
random_walk;
latent_process_priors = EpiAware.STANDARD_RW_PRIORS,
)

#=
## Sample from the model
I define a fixed version of the model with initial infections set to 10 and variance of the random walk process set to 0.1.
We can sample from the model using the `rand` function, and plot the generated infections against generated cases.
=#
cond_toy = fix(toy_log_infs, (init_rw_value = log(10.0), σ²_RW = 0.1))
random_epidemic = rand(cond_toy)

# We can get the generated infections using `generated_quantities` function. Because the observed
# cases are "defined" with a `~` operator they can be accessed directly from the randomly sampled
# process.
gen = generated_quantities(cond_toy, random_epidemic)
plot(
gen.I_t,
label = "I_t",
xlabel = "Time",
ylabel = "Infections",
title = "Generated Infections",
)
scatter!(X.y_t, lab = "generated cases")
2 changes: 1 addition & 1 deletion EpiAware/test/test_latent-processes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
using DynamicPPL, Turing
n = 5
model = random_walk(n)
fixed_model = fix(model, (σ²_RW = 1.0,)) #Fixing the standard deviation of the random walk process
fixed_model = fix(model, (σ²_RW = 1.0, init_rw_value = 0.0)) #Fixing the standard deviation of the random walk process
n_samples = 1000
samples_day_5 =
sample(fixed_model, Prior(), n_samples) |>
Expand Down
40 changes: 40 additions & 0 deletions EpiAware/test/test_models.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

@testitem "`log_infections` with RW latent process" begin
using Distributions, Turing, DynamicPPL
# Define test inputs
y_t = missing # Data will be generated from the model
epimodel = EpiModel([0.2, 0.3, 0.5], [0.1, 0.4, 0.5], 0.8, 10)
latent_process_priors = EpiAware.STANDARD_RW_PRIORS
transform_function = exp
n_generate_ahead = 0
pos_shift = 1e-6
neg_bin_cluster_factor = 0.5
neg_bin_cluster_factor_prior = Gamma(3, 0.05 / 3)

# Call the function
test_mdl = log_infections(
y_t,
epimodel,
random_walk;
latent_process_priors,
transform_function,
pos_shift,
neg_bin_cluster_factor,
neg_bin_cluster_factor_prior,
)

# Define expected outputs for a conditional model
# Underlying log-infections are const value 1 for all time steps and
# any other unfixed parameters

fixed_test_mdl = fix(
test_mdl,
(init_rw_value = log(1.0), σ²_RW = 0.0, neg_bin_cluster_factor = 0.05),
)
X = rand(fixed_test_mdl)
expected_I_t = [1.0 for _ = 1:epimodel.time_horizon]
gen = generated_quantities(fixed_test_mdl, rand(fixed_test_mdl))

# Perform tests
@test gen.I_t expected_I_t
end

0 comments on commit e786bfb

Please sign in to comment.