-
Notifications
You must be signed in to change notification settings - Fork 0
/
18.R_Mar7_LogitProbit.Rmd
358 lines (297 loc) · 18.6 KB
/
18.R_Mar7_LogitProbit.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
---
title: "Fixed Effects, Logit, and Probit"
author: "Lauren K. Perez"
date: "3/7/2019"
output: pdf_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Open and clean the College Scorecard data
The variables you will need for this assignment are: the selective dummy variable that we have made before (if admission rate is less than .3), the private non-profit dummy we have created (value 2 of `control`), the private for-profit dummy we have created (value 3 of `control`), SAT average, admission rate, and median family income (`md_faminc`). You should also create an "expensive" dummy variable that takes a value of 1 for schools that are in the top 10% by cost, in other words when `costt4_a` is greater than $43,573.
```{r}
library(haven)
data2<-read_dta("/Users/kevinxyliu/Desktop/CollegeScorecard1415_forR.dta")
# clean up the data
data2$opeid6[which(data2$opeid6=="NULL")]<-NA
data2$instnm[which(data2$instnm=="NULL")]<-NA
data2$city[which(data2$city=="NULL")]<-NA
data2$stabbr[which(data2$stabbr=="NULL")]<-NA
data2$accredagency[which(data2$accredagency=="NULL")]<-NA
data2$hcm2[which(data2$hcm2=="NULL")]<-NA
data2$main[which(data2$main=="NULL")]<-NA
data2$numbranch[which(data2$numbranch=="NULL")]<-NA
data2$preddeg[which(data2$preddeg=="NULL")]<-NA
data2$highdeg[which(data2$highdeg=="NULL")]<-NA
data2$control[which(data2$control=="NULL")]<-NA
data2$menonly[which(data2$menonly=="NULL")]<-NA
data2$womenonly[which(data2$womenonly=="NULL")]<-NA
data2$adm_rate[which(data2$adm_rate=="NULL")]<-NA
data2$relaffil[which(data2$relaffil=="NULL")]<-NA
data2$actcm25[which(data2$actcm25=="NULL")]<-NA
data2$actcm75[which(data2$actcm75=="NULL")]<-NA
data2$sat_avg[which(data2$sat_avg=="NULL")]<-NA
data2$ugds[which(data2$ugds=="NULL")]<-NA
data2$ugds_white[which(data2$ugds_white=="NULL")]<-NA
data2$ugds[which(data2$ugds=="NULL")]<-NA
data2$ugds_black[which(data2$ugds_black=="NULL")]<-NA
data2$ugds_hisp[which(data2$ugds_hisp=="NULL")]<-NA
data2$ugds_asian[which(data2$ugds_asian=="NULL")]<-NA
data2$npt4_pub[which(data2$npt4_pub=="NULL")]<-NA
data2$npt4_priv[which(data2$npt4_priv=="NULL")]<-NA
data2$costt4_a[which(data2$costt4_a=="NULL")]<-NA
data2$tuitionfee_in[which(data2$tuitionfee_in=="NULL")]<-NA
data2$tuitionfee_out[which(data2$tuitionfee_out=="NULL")]<-NA
data2$inexpfte[which(data2$inexpfte=="NULL")]<-NA
data2$avgfacsal[which(data2$avgfacsal=="NULL")]<-NA
data2$pctpell[which(data2$pctpell=="NULL")]<-NA
data2$c200_4[which(data2$c200_4=="NULL")]<-NA
data2$c200_l4[which(data2$c200_l4=="NULL")]<-NA
data2$ret_ft4[which(data2$ret_ft4=="NULL")]<-NA
data2$ret_ftl4[which(data2$ret_ftl4=="NULL")]<-NA
data2$ret_pt4[which(data2$ret_pt4=="NULL")]<-NA
data2$ret_ptl4[which(data2$ret_ptl4=="NULL")]<-NA
data2$pctfloan[which(data2$pctfloan=="NULL")]<-NA
data2$ug25abv[which(data2$ug25abv=="NULL")]<-NA
data2$cdr3[which(data2$cdr3=="NULL")]<-NA
data2$rpy_1yr_rt[which(data2$rpy_1yr_rt=="NULL")]<-NA
data2$female_rpy_1yr_rt[which(data2$female_rpy_1yr_rt=="NULL")]<-NA
data2$male_rpy_1yr_rt[which(data2$male_rpy_1yr_rt=="NULL")]<-NA
data2$rpy_3yr_rt[which(data2$rpy_3yr_rt=="NULL")]<-NA
data2$female_rpy_3yr_rt[which(data2$female_rpy_3yr_rt=="NULL")]<-NA
data2$male_rpy_3yr_rt[which(data2$male_rpy_3yr_rt=="NULL")]<-NA
data2$rpy_5yr_rt[which(data2$rpy_5yr_rt=="NULL")]<-NA
data2$female_rpy_5yr_rt[which(data2$female_rpy_5yr_rt=="NULL")]<-NA
data2$male_rpy_5yr_rt[which(data2$male_rpy_5yr_rt=="NULL")]<-NA
data2$inc_pct_lo[which(data2$inc_pct_lo=="NULL")]<-NA
data2$par_ed_pct_1stgen[which(data2$par_ed_pct_1stgen=="NULL")]<-NA
data2$inc_pct_m1[which(data2$inc_pct_m1=="NULL")]<-NA
data2$inc_pct_m2[which(data2$inc_pct_m2=="NULL")]<-NA
data2$inc_pct_h1[which(data2$inc_pct_h1=="NULL")]<-NA
data2$inc_pct_h2[which(data2$inc_pct_h2=="NULL")]<-NA
data2$debt_mdn[which(data2$debt_mdn=="NULL")]<-NA
data2$grad_debt_mdn[which(data2$grad_debt_mdn=="NULL")]<-NA
data2$wdraw_debt_mdn[which(data2$wdraw_debt_mdn=="NULL")]<-NA
data2$faminc[which(data2$faminc=="NULL")]<-NA
data2$md_faminc[which(data2$md_faminc=="NULL")]<-NA
data2$faminc_ind[which(data2$faminc_ind=="NULL")]<-NA
data2$mn_earn_wne_p10[which(data2$mn_earn_wne_p10=="NULL")]<-NA
data2$md_earn_wne_p10[which(data2$md_earn_wne_p10=="NULL")]<-NA
data2$gt_25k_p10[which(data2$gt_25k_p10=="NULL")]<-NA
data2$c100_4[which(data2$c100_4=="NULL")]<-NA
data2$c100_l4[which(data2$c100_l4=="NULL")]<-NA
data2$ugds_men[which(data2$ugds_men=="NULL")]<-NA
data2$ugds_women[which(data2$ugds_women=="NULL")]<-NA
data2$opeid6[which(data2$opeid6=="PrivacySuppressed")]<-NA
data2$instnm[which(data2$instnm=="PrivacySuppressed")]<-NA
data2$city[which(data2$city=="PrivacySuppressed")]<-NA
data2$stabbr[which(data2$stabbr=="PrivacySuppressed")]<-NA
data2$accredagency[which(data2$accredagency=="PrivacySuppressed")]<-NA
data2$hcm2[which(data2$hcm2=="PrivacySuppressedL")]<-NA
data2$main[which(data2$main=="PrivacySuppressed")]<-NA
data2$numbranch[which(data2$numbranch=="PrivacySuppressed")]<-NA
data2$preddeg[which(data2$preddeg=="PrivacySuppressed")]<-NA
data2$highdeg[which(data2$highdeg=="PrivacySuppressed")]<-NA
data2$control[which(data2$control=="PrivacySuppressed")]<-NA
data2$menonly[which(data2$menonly=="PrivacySuppressed")]<-NA
data2$womenonly[which(data2$womenonly=="PrivacySuppressed")]<-NA
data2$adm_rate[which(data2$adm_rate=="PrivacySuppressed")]<-NA
data2$relaffil[which(data2$relaffil=="PrivacySuppressed")]<-NA
data2$actcm25[which(data2$actcm25=="PrivacySuppressed")]<-NA
data2$actcm75[which(data2$actcm75=="PrivacySuppressed")]<-NA
data2$sat_avg[which(data2$sat_avg=="PrivacySuppressed")]<-NA
data2$ugds[which(data2$ugds=="PrivacySuppressed")]<-NA
data2$ugds_white[which(data2$ugds_white=="PrivacySuppressed")]<-NA
data2$ugds[which(data2$ugds=="PrivacySuppressed")]<-NA
data2$ugds_black[which(data2$ugds_black=="PrivacySuppressed")]<-NA
data2$ugds_hisp[which(data2$ugds_hisp=="PrivacySuppressed")]<-NA
data2$ugds_asian[which(data2$ugds_asian=="PrivacySuppressed")]<-NA
data2$npt4_pub[which(data2$npt4_pub=="PrivacySuppressed")]<-NA
data2$npt4_priv[which(data2$npt4_priv=="PrivacySuppressed")]<-NA
data2$costt4_a[which(data2$costt4_a=="PrivacySuppressed")]<-NA
data2$tuitionfee_in[which(data2$tuitionfee_in=="PrivacySuppressed")]<-NA
data2$tuitionfee_out[which(data2$tuitionfee_out=="PrivacySuppressed")]<-NA
data2$inexpfte[which(data2$inexpfte=="PrivacySuppressed")]<-NA
data2$avgfacsal[which(data2$avgfacsal=="PrivacySuppressed")]<-NA
data2$pctpell[which(data2$pctpell=="PrivacySuppressed")]<-NA
data2$c200_4[which(data2$c200_4=="PrivacySuppressed")]<-NA
data2$c200_l4[which(data2$c200_l4=="PrivacySuppressed")]<-NA
data2$ret_ft4[which(data2$ret_ft4=="PrivacySuppressed")]<-NA
data2$ret_ftl4[which(data2$ret_ftl4=="PrivacySuppressed")]<-NA
data2$ret_pt4[which(data2$ret_pt4=="PrivacySuppressed")]<-NA
data2$ret_ptl4[which(data2$ret_ptl4=="PrivacySuppressed")]<-NA
data2$pctfloan[which(data2$pctfloan=="PrivacySuppressed")]<-NA
data2$ug25abv[which(data2$ug25abv=="PrivacySuppressed")]<-NA
data2$cdr3[which(data2$cdr3=="PrivacySuppressed")]<-NA
data2$rpy_1yr_rt[which(data2$rpy_1yr_rt=="PrivacySuppressed")]<-NA
data2$female_rpy_1yr_rt[which(data2$female_rpy_1yr_rt=="PrivacySuppressed")]<-NA
data2$male_rpy_1yr_rt[which(data2$male_rpy_1yr_rt=="PrivacySuppressed")]<-NA
data2$rpy_3yr_rt[which(data2$rpy_3yr_rt=="PrivacySuppressed")]<-NA
data2$female_rpy_3yr_rt[which(data2$female_rpy_3yr_rt=="PrivacySuppressed")]<-NA
data2$male_rpy_3yr_rt[which(data2$male_rpy_3yr_rt=="PrivacySuppressed")]<-NA
data2$rpy_5yr_rt[which(data2$rpy_5yr_rt=="PrivacySuppressed")]<-NA
data2$female_rpy_5yr_rt[which(data2$female_rpy_5yr_rt=="PrivacySuppressed")]<-NA
data2$male_rpy_5yr_rt[which(data2$male_rpy_5yr_rt=="PrivacySuppressed")]<-NA
data2$inc_pct_lo[which(data2$inc_pct_lo=="PrivacySuppressed")]<-NA
data2$par_ed_pct_1stgen[which(data2$par_ed_pct_1stgen=="PrivacySuppressed")]<-NA
data2$inc_pct_m1[which(data2$inc_pct_m1=="PrivacySuppressed")]<-NA
data2$inc_pct_m2[which(data2$inc_pct_m2=="PrivacySuppressed")]<-NA
data2$inc_pct_h1[which(data2$inc_pct_h1=="PrivacySuppressed")]<-NA
data2$inc_pct_h2[which(data2$inc_pct_h2=="PrivacySuppressed")]<-NA
data2$debt_mdn[which(data2$debt_mdn=="PrivacySuppressed")]<-NA
data2$grad_debt_mdn[which(data2$grad_debt_mdn=="PrivacySuppressed")]<-NA
data2$wdraw_debt_mdn[which(data2$wdraw_debt_mdn=="PrivacySuppressed")]<-NA
data2$faminc[which(data2$faminc=="PrivacySuppressed")]<-NA
data2$md_faminc[which(data2$md_faminc=="PrivacySuppressed")]<-NA
data2$faminc_ind[which(data2$faminc_ind=="PrivacySuppressed")]<-NA
data2$mn_earn_wne_p10[which(data2$mn_earn_wne_p10=="PrivacySuppressed")]<-NA
data2$md_earn_wne_p10[which(data2$md_earn_wne_p10=="PrivacySuppressed")]<-NA
data2$gt_25k_p10[which(data2$gt_25k_p10=="PrivacySuppressed")]<-NA
data2$c100_4[which(data2$c100_4=="PrivacySuppressed")]<-NA
data2$c100_l4[which(data2$c100_l4=="PrivacySuppressed")]<-NA
data2$ugds_men[which(data2$ugds_men=="PrivacySuppressed")]<-NA
data2$ugds_women[which(data2$ugds_women=="PrivacySuppressed")]<-NA
data2$relaffil[which(data2$relaffil=="-2")]<-NA
data2$preddeg[which(data2$preddeg=="0")]<-NA
# convert data into numerics
data2$opeid6<-as.numeric(data2$opeid6)
data2$hcm2<-as.numeric(data2$hcm2)
data2$main<-as.numeric(data2$main)
data2$numbranch<-as.numeric(data2$numbranch)
data2$preddeg<-as.numeric(data2$preddeg)
data2$highdeg<-as.numeric(data2$highdeg)
data2$control<-as.numeric(data2$control)
data2$menonly<-as.numeric(data2$menonly)
data2$womenonly<-as.numeric(data2$womenonly)
data2$relaffil<-as.numeric(data2$relaffil)
data2$adm_rate<-as.numeric(data2$adm_rate)
data2$actcm25<-as.numeric(data2$actcm25)
data2$actcm75<-as.numeric(data2$actcm75)
data2$sat_avg<-as.numeric(data2$sat_avg)
data2$ugds<-as.numeric(data2$ugds)
data2$ugds_white<-as.numeric(data2$ugds_white)
data2$ugds_black<-as.numeric(data2$ugds_black)
data2$ugds_hisp<-as.numeric(data2$ugds_hisp)
data2$ugds_asian<-as.numeric(data2$ugds_asian)
data2$npt4_pub<-as.numeric(data2$npt4_pub)
data2$npt4_priv<-as.numeric(data2$npt4_priv)
data2$costt4_a<-as.numeric(data2$costt4_a)
data2$tuitionfee_in<-as.numeric(data2$tuitionfee_in)
data2$tuitionfee_out<-as.numeric(data2$tuitionfee_out)
data2$tuitfte<-as.numeric(data2$tuitfte)
data2$inexpfte<-as.numeric(data2$inexpfte)
data2$avgfacsal<-as.numeric(data2$avgfacsal)
data2$pctpell<-as.numeric(data2$pctpell)
data2$c200_4<-as.numeric(data2$c200_4)
data2$c200_l4<-as.numeric(data2$c200_l4)
data2$ret_ft4<-as.numeric(data2$ret_ft4)
data2$ret_ftl4<-as.numeric(data2$ret_ftl4)
data2$ret_pt4<-as.numeric(data2$ret_pt4)
data2$ret_ptl4<-as.numeric(data2$ret_ptl4)
data2$pctfloan<-as.numeric(data2$pctfloan)
data2$ug25abv<-as.numeric(data2$ug25abv)
data2$cdr3<-as.numeric(data2$cdr3)
data2$rpy_1yr_rt<-as.numeric(data2$rpy_1yr_rt)
data2$female_rpy_1yr_rt<-as.numeric(data2$female_rpy_1yr_rt)
data2$male_rpy_1yr_rt<-as.numeric(data2$male_rpy_1yr_rt)
data2$rpy_3yr_rt<-as.numeric(data2$rpy_3yr_rt)
data2$female_rpy_3yr_rt<-as.numeric(data2$female_rpy_3yr_rt)
data2$male_rpy_3yr_rt<-as.numeric(data2$male_rpy_3yr_rt)
data2$rpy_5yr_rt<-as.numeric(data2$rpy_5yr_rt)
data2$female_rpy_5yr_rt<-as.numeric(data2$female_rpy_5yr_rt)
data2$male_rpy_5yr_rt<-as.numeric(data2$male_rpy_5yr_rt)
data2$inc_pct_lo<-as.numeric(data2$inc_pct_lo)
data2$par_ed_pct_1stgen<-as.numeric(data2$par_ed_pct_1stgen)
data2$inc_pct_m1<-as.numeric(data2$inc_pct_m1)
data2$inc_pct_m2<-as.numeric(data2$inc_pct_m2)
data2$inc_pct_h1<-as.numeric(data2$inc_pct_h1)
data2$inc_pct_h2<-as.numeric(data2$inc_pct_h2)
data2$debt_mdn<-as.numeric(data2$debt_mdn)
data2$grad_debt_mdn<-as.numeric(data2$grad_debt_mdn)
data2$wdraw_debt_mdn<-as.numeric(data2$wdraw_debt_mdn)
data2$faminc<-as.numeric(data2$faminc)
data2$md_faminc<-as.numeric(data2$md_faminc)
data2$faminc_ind<-as.numeric(data2$faminc_ind)
data2$mn_earn_wne_p10<-as.numeric(data2$mn_earn_wne_p10)
data2$md_earn_wne_p10<-as.numeric(data2$md_earn_wne_p10)
data2$gt_25k_p10<-as.numeric(data2$gt_25k_p10)
data2$c100_4<-as.numeric(data2$c100_4)
data2$c100_l4<-as.numeric(data2$c100_l4)
data2$ugds_men<-as.numeric(data2$ugds_men)
data2$ugds_women<-as.numeric(data2$ugds_women)
# create new variables
data2$selective <-0
data2$selective[which(data2$adm_rate < 0.3)]<-1
data2$selective[which(is.na(data2$adm_rate))]<-NA
data2$adm_rate_pct <- 100* (data2$adm_rate)
data2$public <- ifelse(data2$control == "1",1,0)
data2$private_non_profit <- ifelse(data2$control == "2",1,0)
data2$private_for_profit <- ifelse(data2$control == "3",1,0)
```
#Fixed Effects
*Start by running a regular, linear model (without fixed effects) that uses the private non-profit dummy, the public dummy, SAT average, and median family income to predict admission rate.*
```{r}
data2$public <- ifelse(data2$control == "1",1,0)
data2$private_non_profit <- ifelse(data2$control == "2",1,0)
data2$private_for_profit <- ifelse(data2$control == "3",1,0)
mod <- lm(adm_rate_pct ~ private_non_profit + public + sat_avg + md_faminc, data=data2)
summary(mod)
```
*Now let's try running that model but adding fixed effects by state. Run the same model as above, but add `as.factor(stabbr)` as the final variable in your model. For example, the model will end with `+ md_faminc + as.factor(stabbr), data=CSdata)`.*
```{r}
mod2 <- lm(adm_rate_pct ~ private_non_profit + public + sat_avg + md_faminc + as.factor(stabbr), data=data2)
summary(mod2)
```
*Make a stargazer table with these two models side by side. It may be easier to make this table `"type=text"` since the fixed effects output will not automatically fit on one page of the PDF output.*
*What differences in terms of sign, significance, and coefficient size do you notice among the main variables in the model (i.e. those that are not fixed effects.)*
*Which state did R leave out as the reference category?*
*Which states are significantly above or below the reference state?*
*Why might we want to include fixed effects by state?*
*What differences do you notice in terms of the goodness-of-fit statistics at the bottom of the table?*
As I mentioned above, including fixed effects in a stargazer table can make it very difficult to fit on one page, and we often do not care too much about the individual fixed effects (rather, we care about controlling for them and seeing how that impacts the main variables). You can tell R to leave the fixed effects out of the stargazer table by adding the omit option. Unfortunately, the way we write this is a bit odd. You'll want to add it like this: `omit="factor\\(stabbr"`.
*Try recreating the stargazer table, but omit the state fixed effects.*
# Logistic (Logit) Regression
**Note: The two variables we will use as dependent variables today, whether a school is selective and wehther it is expensive, are both artificially created binary variables. In normal social science research, you would rather keep these as continuous variables, since you are losing information by making them binary and therefore ending up with less precise estimates. However, this data set does not have great variables to serve as binary dependent variables, so we will use these for today.**
*To run a logistic regression, run the code below:*
```{r}
mylogit1 <- glm(selective ~ privnonprofit + public + sat_avg + md_faminc, data=CSdata, family=binomial)
summary(mylogit1)
```
Note that for the most part, this function looks similar to the `lm()` function we have been using with OLS. The function here is `glm()`, which stands for generalized linear model. Then we give the DV first, followed by a tilda and then the indpendent variables separated by plus signs. You can then tell it what data set to use and tell it that the family of models we are using is the binomial family. The default type of model in this family is logistic regression, so we do not have to specify any further here.
As in linear regression, we see that the output gives us estimates for the coefficients (the betas), their standard errors, test statistics (here it is the z-score rather than the t-statistic) and the p-value. P-values work the same way as in linear regression.
*Which variables are significant? At what levels? Which are not significant?*
The coefficient estimates give us the expected change in the log odds of the dependent variable for a one unit increase in the independent variable.
For example, for each additional point on a school's SAT average, we expect the log odds of the school being selective to increase by .039.
*How would you interpret the coefficients (in terms of log odds) for median family income?*
To get odds ratios, we can exponentiate these coefficients. *Run the code below.*
```{r}
exp(coef(mylogit1))
```
To interpret these, we can talk about a one unit change in X leading the odds to increase by a factor of ___.
For example, we predict that a one point increase in a school's SAT average will increase the odds of a school being considered selective by a factor of 1.039.
*How would you interpret the coefficients (in terms of odds ratios) for median family income?*
*Now run a logistic regression that uses the same independent variables as the first model, along with admission rate, to predict whether a school is expensive.*
*Which variables are significant?*
*Interpret at least one of the variables in terms of the log odds.*
*Calculate the odds ratios.*
*Interpret at least one of the variables in terms of the odds ratios.*
# Probit
Overall, probit is very similar to logit. Which model you choose to use is one of personal preference.
The function is also very similar, but we have to tell it to use a probit model. (Logit is the default once you tell it that the family is binomial, which is why we did not need to do this above.)
*Run the model below.*
```{r}
myprobit1 <- glm(selective ~ privnonprofit + public + sat_avg + md_faminc, data=CSdata, family=binomial(link = "probit"))
summary(myprobit1)
```
This output should look very similar to what we saw above.
These coefficients can be interpreted as a linear increase in the z-scores.
For example, we predict that for a one point increase in a school's SAT average, the z-score increases by .019.
*How would you interpret the coefficients for median family income?*
*Now try running the same model as above to predict whether a school is expensive.*
*Interpret at least one of the coefficients.*
# Comparing Models
Let's compare these models to each other, as well as to the linear probability model.
*Run a linear probability model for model 1 (in other words, an OLS model using the `lm()` function.)*
*Now produce a stargazer table with the linear probability model, logit model, and probit model.*
*What similarities and differences do you notice in terms of signs and significance? Remember that the size of the coefficients are not directly comparable.*
*Now make a stargazer table with the continuous admission rate DV model you did at the top of the R file (without the fixed effects) and the linear probability model. What similarities and differences do you notice in terms of signs and significance?*