Skip to content

Commit

Permalink
Merge pull request #10 from CDCgov/SamuelBrand1/issue7
Browse files Browse the repository at this point in the history
Add double censoring discretization
  • Loading branch information
SamuelBrand1 authored Feb 12, 2024
2 parents 7362a2e + 28fd44a commit dc7c5c4
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 12 deletions.
1 change: 1 addition & 0 deletions EpiAware/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Expand Down
3 changes: 2 additions & 1 deletion EpiAware/src/EpiAware.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ using Distributions,
Random,
ReverseDiff,
Optim,
Parameters
Parameters,
QuadGK

export scan,
create_discrete_pmf,
Expand Down
64 changes: 59 additions & 5 deletions EpiAware/src/utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,19 @@ function scan(f, init, xs)
end

"""
create_discrete_pmf(dist; Δd = 1.0, D)
create_discrete_pmf(dist::Distribution, ::Val{:basic}; Δd = 1.0, D)
Create a discrete probability mass function (PMF) from a given distribution.
Create a discrete probability mass function (PMF) from a given distribution, assuming that the
primary event happens at `primary_approximation_point * Δd` within an intial censoring interval. Common
single-censoring approximations are `primary_approximation_point = 0` (left-hand approximation),
`primary_approximation_point = 1` (right-hand) and `primary_approximation_point = 0.5` (midpoint).
Arguments:
- `dist`: The distribution from which to create the PMF.
- ::Val{:single_censored}: A dummy argument to dispatch to this method. The purpose of the `Val`
type argument is that to use `single-censored` approximation is an active decision.
- `primary_approximation_point`: A approximation point for the primary time in its censoring interval.
Default is 0.5 for midpoint approximation.
- `Δd`: The step size for discretizing the domain. Default is 1.0.
- `D`: The upper bound of the domain. Must be greater than `Δd`.
Expand All @@ -45,16 +52,63 @@ Raises:
- `AssertionError` if `Δd` is not positive.
- `AssertionError` if `D` is not greater than `Δd`.
"""
function create_discrete_pmf(dist::Distribution; Δd = 1.0, D)
function create_discrete_pmf(
dist::Distribution,
::Val{:single_censored};
primary_approximation_point = 0.5,
Δd = 1.0,
D,
)
@assert minimum(dist) >= 0.0 "Distribution must be non-negative"
@assert Δd > 0.0 "Δd must be positive"
@assert D > Δd "D must be greater than Δd"
ts = 0.0:Δd:D |> collect
ts[end] != D && append!(ts, D)
@assert primary_approximation_point >= 0.0 && primary_approximation_point <= 1.0 "`primary_approximation_point` must be in [0,1]."

ts = Δd:Δd:D |> collect
@assert ts[end] == D "D must be a multiple of Δd."
ts = [primary_approximation_point * Δd; ts] #This covers situation where primary_approximation_point == 1

ts .|> (t -> cdf(dist, t)) |> diff |> p -> p ./ sum(p)
end

"""
create_discrete_pmf(dist::Distribution; Δd = 1.0, D)
Create a discrete probability mass function (PMF) from a given distribution, assuming
a uniform distribution over primary event times with censoring intervals of width `Δd` for
both primary and secondary events. The CDF for the time from the left edge of the interval
containing the primary event to the secondary event is created by direct numerical integration
of the convolution of the CDF of `dist` with the uniform density on `[0,Δd)`, the discrete PMF
for double censored delays is then found using simple differencing on the CDF.
Arguments:
- `dist`: The distribution from which to create the PMF.
- `Δd`: The step size for discretizing the domain. Default is 1.0.
- `D`: The upper bound of the domain. Must be greater than `Δd`.
Returns:
- A vector representing the PMF.
Raises:
- `AssertionError` if the minimum value of `dist` is negative.
- `AssertionError` if `Δd` is not positive.
- `AssertionError` if `D` is not greater than `Δd`.
"""
function create_discrete_pmf(dist::Distribution; Δd = 1.0, D)
@assert minimum(dist) >= 0.0 "Distribution must be non-negative."
@assert Δd > 0.0 "Δd must be positive."
@assert D > Δd "D must be greater than Δd."

ts = 0.0:Δd:D |> collect

@assert ts[end] == D "D must be a multiple of Δd."

∫F(dist, t, Δd) = quadgk(u -> cdf(dist, t - u) / Δd, 0.0, Δd)[1]

ts .|> (t -> ∫F(dist, t, Δd)) |> diff |> p -> p ./ sum(p)
end

"""
growth_rate_to_reproductive_ratio(r, w)
Expand Down
107 changes: 107 additions & 0 deletions EpiAware/test/predictive_checking/discretized_pmfs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#=
# Discretized PMFs
## Analytical PMF for the Exponential distribution
For unit testing it is useful to have an analytically solvable example of double interval censoring. Easiest distribution we
could solve analytically but was also not completely trivial was $X \sim \text{Exp}(1)$ day delay with daily interval censoring.
W.l.o.g. Primary censored obs time is $t = 0$, and we go from the double-censored interval with uniform on interval primary
approximation as per [here](https://www.medrxiv.org/content/10.1101/2024.01.12.24301247v1).
For above example the secondary censored obs time can be $s = 0, 1, 2,...$ days. The probability mass function is:
```math
P_S(s) = \int_0^1 \int_s^{s+1} f_X(y-x)dy dx.
```
This splits into two cases: $s = 0$ and $s \geq 1$.
**Case 1:** $s=0$
```math
P_S(0) = \int_0^1 \int_x^1 \exp(-(y - x)) dy dx = \exp(-1).
```
_NB: the density is zero for negative values._
**Case 2:** $s \geq 1$
```math
P_S(s) = \int_0^1 \int_s^{s+1} \exp(-(y - x)) dy dx = (1 - \exp(-1)) (\exp(1) - 1) \exp(-s).
```
we can directly check that the above is a discrete prob distribution. First, non-negativity is obvious. Second,
normalisation to 1 can be directly calculated,
```math
\begin{align}
\sum_{s \geq 1} P_S(s)&= (1 - \exp(-1)) (\exp(1) - 1) \sum_{s \geq 1} \exp(-s) \\
&= (1 - \exp(-1)) (\exp(1) - 1) {\exp(-1) \over 1 - \exp(-1)} \\
& = 1 - \exp(-1).
\end{align}
```
Therefore,
```math
\sum_{s \geq 0} P_S(s) = 1.
```
## Predictive checking for the `create_discrete_pmf` function
This predictive checking shows the difference between the two methods of the `create_discrete_pmf` function
for creating a discrete PMF from a continuous distribution with a given discretization interval `Δd` and upper bound `D`.
The default method is double censoring based on censoring intervals of width `Δd`. The basic method is based on the
same but with the assumption that the primary event happens at the edge of the censoring interval. The left edge implies that
the discrete PMF starts at `0`, the right edge implies that the discrete PMF starts at `Δd`.
=#
using EpiAware
using StatsPlots
using Distributions

# Example distribution is a Gamma distribution with shape 2 and scale 3/2 (mean = 3 days, std = √4.5 days) with an upper bound of 21 days.

cont_dist = Gamma(2, 3.0 / 2)
D = 21.0

# For daily censoring there is a fairly big difference between the two methods, as well as left/right interval endpointing.

plt1 = let
Δd = 1
ts = (0.0:Δd:(D-Δd)) |> collect
pmf1 = create_discrete_pmf(cont_dist, Val(:single_censored); Δd = Δd, D = D)
pmf2 = create_discrete_pmf(cont_dist; Δd = Δd, D = D)

bar(
ts,
[pmf1;; pmf2],
fillalpha = 0.5,
lw = 0,
title = "Discrete PMF with Δd = 1 day",
label = ["Single censoring (midpoint primary)" "Double Censoring"],
xlabel = "Days",
ylabel = "Probability",
)
end
savefig(plt1, joinpath(@__DIR__(), "assets/", "discrete_pmf_daily.png"))

# For hourly censoring the difference is not noticable.

plt2 = let
Δd = 1 / 24
ts = (0.0:Δd:(D-Δd)) |> collect
pmf1 = create_discrete_pmf(cont_dist, Val(:single_censored); Δd = Δd, D = D)
pmf2 = create_discrete_pmf(cont_dist; Δd = Δd, D = D)

bar(
ts,
[pmf1;; pmf2],
fillalpha = 0.5,
lw = 0,
title = "Discrete PMF with Δd = 1 hour",
label = ["Single censoring (midpoint primary)" "Double Censoring"],
xlabel = "Days",
ylabel = "Probability",
)
end
savefig(plt2, joinpath(@__DIR__(), "assets/", "discrete_pmf_hourly.png"))
6 changes: 3 additions & 3 deletions EpiAware/test/test_latent-processes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
samples_day_5 =
sample(fixed_model, Prior(), n_samples) |>
chn -> mapreduce(vcat, generated_quantities(fixed_model, chn)) do gen
gen[1][5]
gen[1][5] #Extracting day 5 samples
end
#Check statistics are within 5 sigma
#Theoretically, after 5 steps distribution is N(0, var = 5)
Expand All @@ -20,9 +20,9 @@

#Theoretically, after 5 steps distribution is N(0, var = 5)



theoretical_std_of_empiral_var = std(Chisq(5)) / sqrt(n_samples)

@info "var = $(var(samples_day_5)); theoretical_std_of_empiral_var = $(theoretical_std_of_empiral_var)"
@test (var(samples_day_5) - 5) < 5 * theoretical_std_of_empiral_var &&
(var(samples_day_5) - 5) > -5 * theoretical_std_of_empiral_var

Expand Down
30 changes: 27 additions & 3 deletions EpiAware/test/test_utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,40 @@ end
@test_throws AssertionError create_discrete_pmf(dist, Δd = 3.0, D = 1.0)
end

# Test case 4: Testing output against expected PMF
# Test case 4: Testing output against expected PMF basic version - single
# interval censoring with left hand approx.
@testset "Test case 4" begin
dist = Exponential(1.0)
expected_pmf = [(exp(-(t - 1)) - exp(-t)) / (1 - exp(-5)) for t = 1:5]
pmf = create_discrete_pmf(dist, Δd = 1.0, D = 5.0)
pmf = create_discrete_pmf(
dist,
Val(:single_censored);
primary_approximation_point = 0.0,
Δd = 1.0,
D = 5.0,
)
@test pmf expected_pmf atol = 1e-15
end

end
# Test case 5: Testing output against expected PMF basic version - double
# interval censoring
@testset "Test case 5" begin
dist = Exponential(1.0)
expected_pmf_uncond = [
exp(-1)
[(1 - exp(-1)) * (exp(1) - 1) * exp(-s) for s = 1:9]
]
expected_pmf = expected_pmf_uncond ./ sum(expected_pmf_uncond)
pmf = create_discrete_pmf(dist; Δd = 1.0, D = 10.0)
@test expected_pmf pmf atol = 1e-15
end

@testset "Test case 6" begin
dist = Exponential(1.0)
@test_throws AssertionError create_discrete_pmf(dist, Δd = 1.0, D = 3.5)
end

end
@testset "Testing growth_rate_to_reproductive_ratio function" begin
#Test that zero exp growth rate imples R0 = 1
@testset "Test case 1" begin
Expand Down

0 comments on commit dc7c5c4

Please sign in to comment.