forked from vikjam/mostly-harmless-replication
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Figure 6-1-2.jl
86 lines (72 loc) · 3.7 KB
/
Figure 6-1-2.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Load packages
using DataFrames
using Gadfly
using Compose
using GLM
# Download the data and unzip it
# download("http://economics.mit.edu/faculty/angrist/data1/mhe/lee", "Lee2008.zip")
# run(`unzip Lee2008.zip`)
# Read the data
lee = readtable("Lee2008/individ_final.csv")
# Subset by non-missing in the outcome and running variable for panel (a)
panel_a = lee[!isna(lee[:, Symbol("difshare")]) & !isna(lee[:, Symbol("myoutcomenext")]), :]
# Create indicator when crossing the cut-off
panel_a[:d] = (panel_a[:difshare] .>= 0) .* 1.0
# Predict with local polynomial logit of degree 4
panel_a[:difshare2] = panel_a[:difshare].^2
panel_a[:difshare3] = panel_a[:difshare].^3
panel_a[:difshare4] = panel_a[:difshare].^4
logit = glm(myoutcomenext ~ difshare + difshare2 + difshare3 + difshare4 + d +
d*difshare + d*difshare2 + d*difshare3 + d*difshare4,
panel_a,
Binomial(),
LogitLink())
panel_a[:mmyoutcomenext] = predict(logit)
# Create local average by 0.005 interval of the running variable
panel_a[:i005] = cut(panel_a[:difshare], collect(-1:0.005:1))
mean_panel_a = aggregate(panel_a, :i005, [mean])
# Restrict within bandwidth of +/- 0.251
restriction_a = (mean_panel_a[:difshare_mean] .> -0.251) & (mean_panel_a[:difshare_mean] .< 0.251)
mean_panel_a = mean_panel_a[restriction_a, :]
# Plot panel (a)
plot_a = plot(layer(x = mean_panel_a[:difshare_mean],
y = mean_panel_a[:myoutcomenext_mean],
Geom.point),
layer(x = mean_panel_a[mean_panel_a[:difshare_mean] .< 0, :difshare_mean],
y = mean_panel_a[mean_panel_a[:difshare_mean] .< 0, :mmyoutcomenext_mean],
Geom.line),
layer(x = mean_panel_a[mean_panel_a[:difshare_mean] .>= 0, :difshare_mean],
y = mean_panel_a[mean_panel_a[:difshare_mean] .>= 0, :mmyoutcomenext_mean],
Geom.line),
layer(xintercept = [0],
Geom.vline,
Theme(line_style = Gadfly.get_stroke_vector(:dot))),
Guide.xlabel("Democratic Vote Share Margin of Victory, Election t"),
Guide.ylabel("Probability of Victory, Election t+1"),
Guide.title("a"))
# Create local average by 0.005 interval of the running variable
panel_b = lee[!isna(lee[:, Symbol("difshare")]) & !isna(lee[:, Symbol("mofficeexp")]), :]
panel_b[:i005] = cut(panel_b[:difshare], collect(-1:0.005:1))
mean_panel_b = aggregate(panel_b, :i005, [mean])
# Restrict within bandwidth of +/- 0.251
restriction_b = (mean_panel_b[:difshare_mean] .> -0.251) & (mean_panel_b[:difshare_mean] .< 0.251)
mean_panel_b = mean_panel_b[restriction_b, :]
# Plot panel (b)
plot_b = plot(layer(x = mean_panel_b[:difshare_mean],
y = mean_panel_b[:mofficeexp_mean],
Geom.point),
layer(x = mean_panel_b[mean_panel_b[:difshare_mean] .< 0, :difshare_mean],
y = mean_panel_b[mean_panel_b[:difshare_mean] .< 0, :mpofficeexp_mean],
Geom.line),
layer(x = mean_panel_b[mean_panel_b[:difshare_mean] .>= 0, :difshare_mean],
y = mean_panel_b[mean_panel_b[:difshare_mean] .>= 0, :mpofficeexp_mean],
Geom.line),
layer(xintercept = [0],
Geom.vline,
Theme(line_style = Gadfly.get_stroke_vector(:dot))),
Guide.xlabel("Democratic Vote Share Margin of Victory, Election t"),
Guide.ylabel("No. of Past Victories as of Election t"),
Guide.title("b"))
# Combine plots
draw(PNG("Figure 6-1-2-Julia.png", 6inch, 8inch), vstack(plot_a, plot_b))
# End of script