dv_supplements.Rmd

---
title: "Iterative Supplements by DV"
output: html_document
params:
  dep_var: "diff_field_importance"
---

```{r setup, warning = FALSE, message = FALSE, error = FALSE}
# load libraries
library(brms)
library(tidyverse)
library(ggplot2)
library(tidybayes)
library(dotwhisker)
library(bayesplot)
library(patchwork)
library(broom)

# load data
long_data <- read_csv(here::here('cleaned_numeric_data_long.csv'), col_types = cols(article_type = col_factor(),
                                                                                    Field = col_factor(),
                                                                                    Match = col_factor(),
                                                                                    Order = col_factor(),
                                                                                    keyword_batch_comp = col_factor()))
wide_data <- read_csv(here::here('cleaned_numeric_data_wide.csv'), col_types = cols(Field = col_factor(),
                                                                                    Match = col_factor(),
                                                                                    Order = col_factor(),
                                                                                    keyword_batch_comp = col_factor()))

contrasts(wide_data$Field) <- contr.sum(3)
contrasts(wide_data$keyword_batch_comp) <- contr.sum(2)
contrasts(wide_data$Order) <- contr.sum(2)
contrasts(wide_data$Match) <- contr.sum(2)
contrasts(long_data$article_type) <- contr.sum(2)
contrasts(long_data$Field) <- contr.sum(3)
contrasts(long_data$keyword_batch_comp) <- contr.sum(2)
contrasts(long_data$Order) <- contr.sum(2)
contrasts(long_data$Match) <- contr.sum(2)
```

## Distribution of Outcome Variable

The bars are to the left of the number they go with (e.g. the tallest bar, which is to the left of the 0 tick mark, is the count for 0 response). Difference scores are calculated as the rating for the Registered Reports article minus the Matched article. 

```{r warning = FALSE, error = FALSE, message = FALSE}
ggplot(wide_data, aes_string(x = params$dep_var)) +
  geom_histogram(breaks=seq(-8, 8, by = 1), col = 'blue', fill = 'grey') +
  scale_x_continuous(breaks=seq(-8, 8, by = 1)) +
  labs(title = stringr::str_glue("Distribution of difference scores of: {params$dep_var}"))
```

# Deciding between pooled vs. seperate models by recruitment batch:

```{r warning = FALSE, message = FALSE, error = FALSE, results = 'hide'}
priors <- c(set_prior("normal(0,2)", "b"),
            set_prior("normal(0, 2.5)", "sd"))
```


## Within-Subjects Difference Score Models by 'keyword batch'

### First Batch
```{r warning = FALSE, message = FALSE, error = FALSE, results = 'hide'}
# differenc score model for batch 1
within_model_diffs_keywords1 <- brm(as.formula(paste(params$dep_var, "~ Field + Order + Match +
                                                    Order*Match +
                                                    (1|RR)")),
                          data = wide_data %>% filter(keyword_batch_comp == 1),
                          prior = priors, 
                          family = 'gaussian',
                          chains = 4,
                          seed = 3)
```

```{r warning = FALSE, message = FALSE,error = FALSE}
summary(within_model_diffs_keywords1)
pp_check(within_model_diffs_keywords1)
WAIC(within_model_diffs_keywords1)
loo(within_model_diffs_keywords1)
```

### Batches 2 + 3

``` {r warning = FALSE, message = FALSE, error = FALSE, results = 'hide'}
within_model_diffs_keywords2 <- brm(as.formula(paste(params$dep_var, "~ Field + Order + Match +
                                                    Order*Match +
                                                    (1|RR)")),
                                    data = wide_data %>% filter(keyword_batch_comp == 2),
                                    prior = priors, 
                                    family = 'gaussian',
                                    chains = 4,
                                    seed = 4)
```

```{r warning = FALSE, message = FALSE,error = FALSE}
summary(within_model_diffs_keywords2)
pp_check(within_model_diffs_keywords2)
WAIC(within_model_diffs_keywords2)
loo(within_model_diffs_keywords2)
```

### graphs of posterior samples
```{r warning = FALSE, message = FALSE,error = FALSE}
posteriors_keywords2 <- suppressMessages( 
  mcmc_areas(posterior_samples(within_model_diffs_keywords2),
             regex_pars = "b_",
             prob=.9) +
             xlim(-2, 2) +
             labs(title = stringr::str_glue('Batch 2 of {params$dep_var}'))
)

posteriors_keywords1 <- suppressMessages( 
  mcmc_areas(posterior_samples(within_model_diffs_keywords1),
             regex_pars = "b_",
             prob=.9) +
             xlim(-2, 2) +
             labs(title = stringr::str_glue('Batch 1 of {params$dep_var}'))
)

posteriors_keywords1 / posteriors_keywords2
```


## Within-Subjects Different Score Model (with covariate for batch)

```{r, warning = FALSE, message = FALSE, error = FALSE, results = 'hide'}

# difference score model
within_model_diffs <- brm(as.formula(paste(params$dep_var, "~ Field + keyword_batch_comp + Order + Match +
                                                    Order*Match +
                                                    (1|RR)")),
                           data = wide_data,
                           prior = priors, 
                           family = 'gaussian',
                           chains = 4,
                           seed = 5)
```

```{r warning = FALSE, message = FALSE,error = FALSE}
summary(within_model_diffs)
pp_check(within_model_diffs)
pp_check(within_model_diffs, type = "stat", stat = 'median')
WAIC(within_model_diffs)
loo(within_model_diffs)
```

## graph comparing estimates and 95% intervals for all 3 models
```{r}
bind_rows(tidy(within_model_diffs) %>% mutate(model = 'within_diff_pooled'),
          tidy(within_model_diffs_keywords1) %>% mutate(model = 'batch1'),
          tidy(within_model_diffs_keywords2) %>% mutate(model = 'batch2')) %>%
  filter(grepl('b_', term)) %>%
  dotwhisker::dwplot() +
  ggtitle(stringr::str_glue('All within models for {params$dep_var}'))
```