forked from tjmahr/MadR_RStanARM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbayesian_updating.Rmd
152 lines (124 loc) · 4.44 KB
/
bayesian_updating.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
---
title: "Bayesian updating demo"
author: "Tristan Mahr"
date: "September 20, 2016"
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(
echo = TRUE,
comment = "#>",
collapse = TRUE)
```
```{r}
library("dplyr")
library("tidyr")
library("ggplot2")
# We have some IQ scores
iqs <- car::Burt$IQbio
iqs
# Want to use Bayesian updating to estimate the mean of the data and the sd of
# the data.
# Use a uniform prior across a range of means and SDs
each_mean <- seq(from = 70, to = 130, by = 1)
each_sd <- 10:20
# equal probability
p_mean <- 1 / length(each_mean)
p_sd <- 1 / length(each_sd)
# We are using a grid approximation. Create every combination of mean and sd.
# Joint probability as product of each one's prior probability.
df <- data_frame(each_mean, p_mean) %>%
expand(nesting(each_mean, p_mean), nesting(each_sd, p_sd)) %>%
mutate(prior = p_sd * p_mean,
posterior = prior,
data_seen = 0)
df
# Update the posterior given some data
update_distribution <- function(df, data) {
data_seen <- unique(df$data_seen) + length(data)
df %>%
# Last posterior is current prior
select(each_mean, each_sd, prior = posterior) %>%
# Attach data to each mean-sd pair
expand(nesting(each_mean, each_sd, prior), points = data) %>%
# Measure likelihood of data in each mean-sd pair
group_by(each_mean, each_sd, prior) %>%
summarise(likelihood = prod(dnorm(points, each_mean, each_sd))) %>%
ungroup %>%
# bayes theorem
mutate(posterior = likelihood * prior / (sum(likelihood * prior)),
data_seen = data_seen)
}
# Create each sequence of data samples
data_sequence <- Map(seq, 1, seq_along(iqs)) %>%
lapply(function(xs) iqs[xs])
iq_sequence <- data_sequence %>%
purrr::map(~ update_distribution(df, .x))
# include 0 data seen
iq_sequence <- c(list(df), iq_sequence)
iq_sequence %>%
purrr::map_dbl(~ sum(.x$posterior))
df_fit <- iq_sequence %>%
purrr::accumulate(bind_rows) %>%
# Create animation steps
purrr::map(. %>% mutate(step = max(data_seen)) %>%
# Keep last five data_seen values.
filter(step - data_seen <= 5) %>%
# Create a column to control transparency using recency
mutate(alpha = 1 - ((step - data_seen) / 5))) %>%
bind_rows
df_fit
# Also want a data-frame of the cumulative data-set as each new point is added.
# This lets us plot a rug of raw data.
data_sequence <- data_sequence %>%
purrr::map_df(~ data_frame(iq = .x, data_seen = length(iq))) %>%
group_by(data_seen) %>%
mutate(step = max(data_seen), sequence = seq_along(data_seen), current = sequence == max(sequence)) %>%
select(-sequence) %>%
ungroup
data_sequence
library("gganimate")
# plot posterior of means, one line per sd level
p <- ggplot(df_fit) +
aes(x = each_mean, y = posterior, frame = step) +
geom_line(aes(group = interaction(each_sd, data_seen), alpha = alpha)) +
ggtitle("Data observed:") +
xlab("possible mean") +
ylab("posterior probability") +
guides(alpha = FALSE) +
theme_grey(base_size = 14) +
geom_rug(aes(x = iq, y = 0), data_sequence, sides = "b") +
geom_rug(aes(x = iq, y = 0), filter(data_sequence, current), size = 1.5,
sides = "b")
animation::ani.options(interval = 1/10)
g <- gg_animate(p, ani.height = 400, ani.width = 640)
# g
gg_animate_save(g, filename = "./assets/updating.gif", saver = "gif",
ani.height = 400, ani.width = 640)
# plot posterior of sds, one line per mean level
p <- ggplot(df_fit) +
aes(x = each_sd, y = posterior, frame = step) +
geom_line(aes(group = interaction(each_mean, data_seen), alpha = alpha)) +
ggtitle("Data observed:") +
xlab("possible sd") +
ylab("posterior probability") +
guides(alpha = FALSE) +
theme_grey(base_size = 14)
g2 <- gg_animate(p, ani.height = 400, ani.width = 640)
gg_animate_save(g2, filename = "./assets/updating_sds.gif", saver = "gif",
ani.height = 400, ani.width = 640)
p <- iq_sequence %>%
bind_rows %>%
ggplot(.) +
aes(x = each_mean, y = each_sd, z = posterior, group = data_seen, frame = data_seen) +
geom_contour() +
ggtitle("Data observed:") +
xlab("possible mean") +
ylab("possible sd")
g3 <- gg_animate(p, ani.height = 400, ani.width = 640)
gg_animate_save(g3, filename = "./assets/updating_contour.gif", saver = "gif",
ani.height = 400, ani.width = 640)
```
![](./assets/updating.gif)
![](./assets/updating_sds.gif)
![](./assets/updating_contour.gif)