forked from vikjam/mostly-harmless-replication
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Table 3-3-2.jl
71 lines (60 loc) · 2.65 KB
/
Table 3-3-2.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Load packages
using DataFrames
using FileIO, StatFiles
using Statistics
using GLM
# Download the data
download("http://economics.mit.edu/files/3828", "nswre74.dta")
download("http://economics.mit.edu/files/3824", "cps1re74.dta")
download("http://economics.mit.edu/files/3825", "cps3re74.dta")
# Read the Stata files into Julia
nswre74 = DataFrame(load("nswre74.dta"))
cps1re74 = DataFrame(load("cps1re74.dta"))
cps3re74 = DataFrame(load("cps3re74.dta"))
summary_vars = [:age, :ed, :black, :hisp, :nodeg, :married, :re74, :re75]
nswre74_stat = combine(nswre74, summary_vars .=> mean)
# Calculate propensity scores
probit = glm(@formula(treat ~ age + age2 + ed + black + hisp +
nodeg + married + re74 + re75),
cps1re74,
Binomial(),
ProbitLink())
cps1re74.pscore = predict(probit)
probit = glm(@formula(treat ~ age + age2 + ed + black + hisp +
nodeg + married + re74 + re75),
cps3re74,
Binomial(),
ProbitLink())
cps3re74.pscore = predict(probit)
# Create function to summarize data
function summarize(data, condition)
stats = combine(data[condition, :], summary_vars .=> mean)
stats.count = [size(data[condition, summary_vars])[1]]
return(stats)
end
# Summarize data
nswre74_treat_stats = summarize(nswre74, nswre74.treat .== 1)
nswre74_control_stats = summarize(nswre74, nswre74.treat .== 0)
cps1re74_control_stats = summarize(cps1re74, cps1re74.treat .== 0)
cps3re74_control_stats = summarize(cps3re74, cps3re74.treat .== 0)
cps1re74_ptrim_stats = summarize(cps1re74, broadcast(&, cps1re74.treat .== 0,
cps1re74.pscore .> 0.1,
cps1re74.pscore .< 0.9))
cps3re74_ptrim_stats = summarize(cps3re74, broadcast(&, cps3re74.treat .== 0,
cps3re74.pscore .> 0.1,
cps3re74.pscore .< 0.9))
# Combine summary stats, add header and print to markdown
table = vcat(nswre74_treat_stats,
nswre74_control_stats,
cps1re74_control_stats,
cps3re74_control_stats,
cps1re74_ptrim_stats,
cps3re74_ptrim_stats)
table.id = 1:size(table, 1)
table = stack(table, [:age_mean, :ed_mean, :black_mean, :hisp_mean,
:nodeg_mean, :married_mean, :re74_mean, :re75_mean, :count])
table = unstack(table, :variable, :id, :value)
rename!(table, [:Variable, :NSWTreat, :NSWControl,
:FullCPS1, :FullCPS3, :PscoreCPS1, :PscoreCPS3])
println(table)
# End of script