-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab7.R
156 lines (106 loc) · 4.76 KB
/
lab7.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
################################
### Simple Linear Regression ###
################################
# Set WD
setwd("/Users/akuelz/Desktop/315/Spr24/Lab/data")
# Check to see if package is installed, install if not
if (!require("car")) install.packages("car")
# Load packages
library(psych)
library(rties)
library(brms)
library(bayesplot)
library(bayestestR)
library(car)
# Get rid of the pesky scientific notation
options(scipen = 999)
# Read in Data
d0 <- read.csv("LAF_labV06_subsetV02.csv", stringsAsFactors = T)
d0<-LAF_labV06_subsetV02
## Premise for today: Identify variables that are meaningful predictors of relationship conflict.
## Our goal is to find meaningful predictors that impact relational conflict.
str(d0)
# Let's focus on 2 potential predictors:
# Level of relationship stress experienced in the last 7 days
# Level of general stress experienced in the last 7 days (i.e., stress caused by external factors)
d1 <- subset(d0, select = c(conflict, genstressA, relstressA))
############### Visualize and Explore Data ##################
describe(d1, IQR = T, quant = c(.25, .75))
histAll(d1)
# Visualize potential associations (checking for linearity)
plot(conflict ~ genstressA, data = d1)
abline(glm(conflict ~ genstressA, data = d1, family = "gaussian"))
plot(conflict ~ relstressA, data = d1)
abline(glm(conflict ~ relstressA, data = d1, family = "gaussian"))
# All together here
pairs.panels(d1, lm = T)
#### Practice Centering: Technically a value of '0' is meaningful for both predictors (reflects no stress in the past 7 days...oh to be them)
# We're going to center around the mean to show as an example. Now the 0 for each predictor will reflect the sample average for that predictor
d1$relstressA_c <- d1$relstressA - mean(d1$relstressA, na.rm = T)
summary(d1$relstressA)
summary(d1$relstressA_c)
d1$genstressA_c <- d1$genstressA - mean(d1$genstressA, na.rm = T)
############### NHST Version #################
m1 <- glm(conflict ~ relstressA_c, data = d1,
family = "gaussian", na.action = na.omit)
######### First, let's check model assumptions:
### 1. Normality of Residuals (test statistics and p-values rely on this):
hist(resid(m1), breaks = 10)
summary(resid(m1))
qqPlot(resid(m1))
### 2. Homoscedasticity (Constant Variance):
# A violation of the constant variance assumption results in inaccurate confidence intervals
# and p-values, even in large samples, although regression coefficient estimates will still be unbiased
plot(resid(m1) ~ relstressA, data = d1)
abline(h = 0)
### 3. Linearity
# We've already checked this prior when exploring the data
### 4. Independence
# Assumption will always be met with the data in this course.
#### Our goal is to be as transparent as possible and contribute to replicable science.
# So, what would we say about the residuals in terms of
# normality around 0?
# constant variance?
#################### Finally ready to interpret our findings!
summary(m1)
# Intercept: b = 2.01, SE = 0.07, p < .001
# Slope of Relationship Stress: b = 1.29, SE = 0.13, p < .001
# Disperion parameter (residual/unexplained/leftover variance): This is all the remaining reasons we haven't explained yet
###### Confidence Intervals
confint.lm(m1)
###### Model Effect Size (R-squared)
r_sq <- function(model_name) {
r2 <- 1 - model_name$deviance/model_name$null.deviance
return(r2)
}
r_sq(m1) # Interpret this
############ Bayesian Version ##############
b1 <- brm(conflict ~ 0 + Intercept + relstressA_c, family = "gaussian",
chains = 4, iter = 2000, seed = 123, data = d1)
### First let's acknowledge our ignorance around the priors:
prior_summary(b1)
### Now let's check for evidence of convergence:
plot(b1) # looking for fuzzy caterpillars in the trace plots
summary(b1) # with special focus on the ESS and Rhat values
### Check Model Assumptions (including accuracy of specified likelihood distribution)
pp_check(b1, ndraws = 100) # how accurate do the draws based on the normal distribution appear?
pp_check(b1, type = "error_hist", ndraws = 12, set.seed(456)) # check normality of residuals around 0
pp_check(b1, type = "error_scatter_avg_vs_x", x = "relstressA_c", ndraws = 20, set.seed(456)) #check for constant variance
#################### Finally ready to interpret our findings!
summary(b1)
plot(b1)
# Intercept: b = 2.01, SD = 0.07, HDI (1.87, 2.15)
# Slope: b = 1.29, SD = 0.13, HDI (1.03, 1.56)
# Sigma = Residual/Leftover/Unaccounted for Variance in SD units
###### Model Effect Size (R-squared)
bayes_R2(b1)
##### Probability of Direction
pd <- p_direction(b1)
pd
plot(pd)
##### Region of Practical Equivalence
r <- rope(b1)
r # by default the rope range is calculated as 0 +- .01 * SD of Y
plot(r)
##### Most of the relevant output in one place:
describe_posterior(b1, dispersion = T, centrality = "mean")