-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.R
83 lines (74 loc) · 3.21 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# UTILITY FUNCTIONS
hash_version <- function() {
files = c(
Sys.glob(here("R/model/*.R")),
Sys.glob(here("stan/*"))
)
hashes = lapply(files, rlang::hash_file)
str_sub(rlang::hash(hashes), 1, 8)
}
conj_mean <- function(y, prior_loc=0, prior_nobs=1) {
n = length(y)
(prior_nobs * prior_loc + n * mean(y)) / (prior_nobs + n)
}
plot_time <- function(d, qty, election_date, ylab=NULL, thin=1) {
library(geomtextpath)
yr <- strftime(election_date, "%Y")
ck = if (thin == 1) 0 else 1
ggplot(filter(d, day %% thin == ck), aes(date, {{ qty }})) +
geom_hline(yintercept=0.5, lty="dashed") +
geom_ribbon(aes(ymin=pmin(.lower, 0.5), ymax=pmin(.upper, 0.5),
group=.width), fill="#a020103a") +
geom_ribbon(aes(ymin=pmax(.lower, 0.5), ymax=pmax(.upper, 0.5),
group=.width), fill="#1020c03a") +
geom_textvline(xintercept=election_date, label="Election Day",
linewidth=0.3, hjust=0.99, vjust=-0.3, size=3, fontface="bold") +
{if (yr != "2022" && deparse(substitute(qty)) == "natl_dem") {
tmp <- read_csv(here("data/fundamentals.csv"), progress=FALSE, show_col_types=FALSE)
idx = which(as.character(tmp$year) == yr)
y = plogis(tmp$linc_vote_imp[idx] * (2*tmp$dem_pres[idx] - 1))
geom_texthline(yintercept=y, label="Actual", linewidth=0.5,
hjust=0.8, vjust=-0.3, size=3, fontface="bold")
}} +
{if (Sys.Date() < election_date)
geom_textvline(xintercept=Sys.Date(), label="Today",
linewidth=0.3, hjust=0.99, vjust=-0.3, size=3, fontface="bold")
} +
geom_line(aes(lty=date >= Sys.Date()), lwd=1.2) +
scale_y_continuous(ylab, labels=scales::percent, breaks=seq(0, 1, 0.01)) +
scale_x_date(NULL, date_breaks="1 month", date_labels="%B",
minor_breaks=election_date - seq(-14, 300, 7),
expand=expansion(mult=c(0, 0.05))) +
guides(lty="none") +
labs(title=yr) +
theme_bw()
}
date_midpt <- function(d1, d2) {
int <- lubridate::interval(d1, d2)
start <- lubridate::int_start(int)
lubridate::as_date(start + (lubridate::int_end(int) - start)/2)
}
match_manual <- function(xx, yy) {
gen_regex = " (the|surveys?|data|co\\.?|company|inc\\.?|panel|university|college) "
xx = unique(xx)
yy = unique(yy)
xx_short = str_squish(str_remove_all(str_to_lower(str_c(" ", xx, " ")), gen_regex))
yy_short = str_squish(str_remove_all(str_to_lower(str_c(" ", yy, " ")), gen_regex))
dists = adist(str_squish(str_to_lower(xx)),
str_squish(str_to_lower(yy)), costs=list(ins=1, del=1, subst=20))
dists_rem = adist(xx_short, yy_short, costs=list(ins=1, del=1, subst=20))
dists[dists_rem < dists] = dists_rem[dists_rem < dists]
out = xx
names(out) = xx
for (i in seq_along(xx)) {
dd = dists[i, ]
if (sum(dd == 0) == 1) {
out[i] = yy[which(dd == 0)]
} else {
opts <- yy[head(order(dd), 16)]
choice <- select.list(opts, title=xx[i])
if (choice != "") out[i] = choice
}
}
out
}