-
Notifications
You must be signed in to change notification settings - Fork 0
/
7_determinants_BP.R
197 lines (138 loc) · 7.9 KB
/
7_determinants_BP.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
##### Dissertation Maria Reyes Retana -
# This code runs the regressions to study the determinants of female bargaining power.
##### Libraries #####
library(tidyverse)
library(readxl)
library(xlsx)
library(lubridate)
library(plm)
library(lmtest)
library(gmm)
library(momentfit)
library(skimr)
library(stargazer)
library(gtsummary)
library(flextable)
library(scales)
##### Read data #####
source('5_complete_data.R')
##### Generates data panel #####
# created data base for women
p_data_women <- pdata.frame(mom_base, index = c("pid_link_mom", "year")) %>%
mutate(income_c_mom = ifelse(worked_12_mom ==0 & is.na(income_c_mom), 0, income_c_mom),
log_income_mom = case_when(income_c_mom > 0 ~ log(income_c_mom),
TRUE ~ income_c_mom)) %>%
# only married women
filter(married_mom ==1) %>%
# only non empty values for the index
filter(!is.na(PC1money_mom))
##### Financial model ----
# financial index POLS
fin_mod1 <- plm(PC1money_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + HH_mom + test_score_mom + log_income_mom,
data = p_data_women, model = "pooling")
# Financial index FE
fin_mod2 <- plm(PC1money_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + HH_mom + test_score_mom + log_income_mom,
data = p_data_women, model = "within", effect = "time")
fin_mod22_randomeffects <- plm(PC1money_mom~ age_mom + c(age_mom*age_mom) + edu_mom + HH_mom + test_score_mom
+ log_income_mom,
data = p_data_women, model = "random", effect = "time", random.method = "amemiya")
# 1. Hausman Test
hausman_test <- phtest(fin_mod2, fin_mod22_randomeffects)
print(hausman_test)
## #Here, the p-value is 0.9755, which is greater than the typical significance level of 0.05.
#Therefore, we fail to reject the null hypothesis. This means there's insufficient evidence to suggest that
#the random effects model is inconsistent.
#So, based on this test, it would be more appropriate to use the random effects model over the fixed effects model.
# 2. Breusch-Pagan test
bp_test <- plmtest(fin_mod22_randomeffects, type=c("bp"))
print(bp_test)
# The null hypothesis of this test is that variances across entities (like individuals, firms, etc.) are zero,
#which means that there are no random effects. In this context, rejecting the null hypothesis would provide evidence
#in favor of a random effects model.
#In this case, the p-value is extremely small (0.00004711), which is well below the common significance level of 0.05.
#This means that we would reject the null hypothesis and conclude that there are significant random effects in the model.
# 3. Breusch-Godfrey/Wooldridge
bg_test <- pbgtest(fin_mod22_randomeffects)
# chi-squared statistic of 51.099 with a p-value extremely close to zero (0.0000000000008783).
#This p-value is well below conventional significance levels (such as 0.05 or 0.01), so we would reject the null hypothesis.
# for this reason we use robust standard errors
# conclusion: we will use the random effects model with robust
# Children model -----
# first old estimations
ch_mod1 <- plm(PC1ch_new_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + HH_mom + test_score_mom
+ log_income_mom,
data = p_data_women, model = "pooling")
ch_mod2<- plm(PC1ch_new_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + HH_mom + test_score_mom
+ log_income_mom,
data = p_data_women, model = "within", effect = "time")
ch_mod2_randomeffects <- plm(PC1ch_new_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + HH_mom + test_score_mom
+ log_income_mom,
data = p_data_women, model = "random", effect = "time", random.method = "amemiya")
# estimations with edits by Andy
# 1. Test FE time versus pooled OLS: p-value is 0.72 cannot reject OLS in favour of Time FE
pFtest(ch_mod2, ch_mod1)
# 2. Hausman
hausman_test_ch <- phtest(ch_mod2, ch_mod2_randomeffects)
print(hausman_test_ch)
#the Hausman test yields a p-value of 0.9978, which is above the commonly used significance level thresholds.
#Hence, we fail to reject the null hypothesis.
#This suggests that the random effects model is consistent and efficient, and it would be appropriate.
# 3. Breusch-Pagan test
bp_test_ch <- plmtest(ch_mod2_randomeffects, type=c("bp"))
print(bp_test_ch)
# There is presence of random effects.
bg_test_ch <- pbgtest(ch_mod2_randomeffects)
print(bg_test_ch)
# Presence of serial correlation, therefore we need to use
# robust standard errors.
# conclusion the most appropiate for this is random with robust standard erros
##### Save results -----
# generate clustered standard errors in a list v
fin_rob_se <- list(
sqrt(diag(vcovHC(ch_mod2_randomeffects, type = "HC1"))),
sqrt(diag(vcovHC(fin_mod22_randomeffects, type = "HC1")))
)
stargazer(ch_mod2_randomeffects, fin_mod22_randomeffects,
digits = 3, header = FALSE, type = "html", dep.var.labels=c("Children bargaining index", "Financial bargaining index"),
covariate.labels=c("Age", "Age squared", "Education level", "Household head", "Test score",
"Log income"),
no.space = TRUE, align = TRUE,
se = fin_rob_se, model.numbers = TRUE, column.labels = c("Random Effects", "Random Effects"),
add.lines = list(c('Time effects', 'Yes','Yes'),
c('Hausman test p value','0.998', '0.976')),
column.sep.width = "50pt", # Well... you can tweak this,
notes = c("Source: MXFLS", "Robust standard errors in parenthesis."),
# omit.stat = c("f"),
out = "Outputs/models/mom_models_final.htm")
##### Runs regressions: robust check PC1 ######
fin_mod1_rob <- plm(PC2tot_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + worked_12_mom + HH_mom + test_score_mom + dummy_div + log_income_mom,
data = p_data_women, model = "pooling")
fin_mod2_rob <- plm(PC2tot_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + worked_12_mom + HH_mom + test_score_mom
+ dummy_div + log_income_mom,
data = p_data_women, model = "within", effect = "time")
# tests: pooled or time fe?
# 1. Test FE time versus pooled OLS: p-value is 0.2316 i cannot reject OLS
pFtest(fin_mod2_rob, fin_mod1_rob)
ch_mod1_rob <- plm(PC1tot_mom ~ age_mom + c(age_mom*age_mom)+ edu_mom + worked_12_mom + HH_mom + test_score_mom
+ dummy_div + log_income_mom,
data = p_data_women, model = "pooling")
ch_mod2_rob <- plm(PC1tot_mom ~ age_mom + c(age_mom*age_mom) + edu_mom + worked_12_mom + HH_mom + test_score_mom
+ dummy_div + log_income_mom,
data = p_data_women, model = "within", effect = "time")
# 1. Test FE time versus pooled OLS: p-value is 0.00003211 ha is significant effects, we reject OLS in favour of Time FE
pFtest(ch_mod2_rob, ch_mod1_rob)
# generate clustered standard errors in a list v
fin_rob_se_rob <- list(sqrt(diag(vcovHC(ch_mod1_rob, type = "HC1"))),
sqrt(diag(vcovHC(ch_mod2_rob, type = "HC1"))),
sqrt(diag(vcovHC(fin_mod1_rob, type = "HC1"))),
sqrt(diag(vcovHC(fin_mod2_rob, type = "HC1"))))
stargazer(ch_mod1_rob, ch_mod2_rob, fin_mod1_rob, fin_mod2_rob,
digits = 3, header = FALSE, type = "html", dep.var.labels=c("PC1", "PC2"),
covariate.labels=c("Age", "Age squared", "Education level", "Household head", "Test score",
"Divorced", "Income"),
no.space = TRUE, align = TRUE,
se = fin_rob_se_rob, model.numbers = FALSE, column.labels = c(" POLS (1)", "POLS (2)", " POLS (3)", " POLS (4)"),
add.lines = list(c('Time effects', 'No', 'Yes', 'No', 'Yes')),
column.sep.width = "5pt", # Well... you can tweak this,
# omit.stat = c("f"),
out = "Outputs/robustness_checks/mom_models_rob.htm")