diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7c76f39 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rhistory +.Rproj.user +img +data diff --git a/README.md b/README.md new file mode 100644 index 0000000..215a618 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# Mind the gender gap: COVID-19 lockdown effects on gender differences in research + +Supporting methods and data. diff --git a/analysis-fig.R b/analysis-fig.R new file mode 100644 index 0000000..2665c07 --- /dev/null +++ b/analysis-fig.R @@ -0,0 +1,58 @@ +source(knitr::purl("analysis.Rmd", output=tempfile())) + +ggplot(df.plot.papers) + + aes(yearmon+2017, n) + facet_wrap(~category, scales="free_y") + + geom_col(data=df.plot.papers.all) + + geom_smooth(data=df.plot.papers.all) + + geom_col() + geom_smooth() + + zoo::scale_x_yearmon(format="%Y") + + labs(y="Number of papers per month", x=NULL) +ggsave("img/Fig1.eps", width=190.5, height=100, units="mm", scale=1.6, device=cairo_ps) + +ggplot(df.plot.p_male) + + aes(yearmon+2017, p_male) + facet_wrap(~category) + + geom_point(data=df.plot.p_male.all) + + geom_smooth(method="gam", data=df.plot.p_male.all) + + geom_point() + geom_smooth(method="gam") + + zoo::scale_x_yearmon(format="%Y") + + expand_limits(y=1) + + labs(y="Proportion of male authors", x=NULL) +ggsave("img/Fig2.eps", width=190.5, height=100, units="mm", scale=1.6, device=cairo_ps) + +ggplot(filter(df.terms, type=="GLMM" | is.na(type))) + + aes(estimate, term, xmin=conf.low, xmax=conf.high, color=type) + + facet_grid("fit", scales="free_y", space="free_y") + + geom_vline(xintercept=1, color="black", linetype="dashed") + + geom_errorbarh(height=0, size=1) + geom_point(size=2) + + ggrepel::geom_text_repel( + aes(label=p.label), nudge_y=0.3, nudge_x=0.01, segment.size=0, show.legend=FALSE) + + scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) + + scale_color_manual(values="black", na.value=grDevices::adjustcolor("grey50", 0.5)) + + labs(y=NULL, x="Odds ratio", color="Model") + + theme(legend.position="none") +ggsave("img/Fig3.eps", width=132, height=100, units="mm", scale=1.6, device=cairo_ps) + +ggplot(df.re.category) + + aes(estimate, term, xmin=conf.low, xmax=conf.high, color=group) + + geom_vline(xintercept=1, color="black", linetype="dashed") + + geom_errorbarh(height=0, size=1) + geom_point(size=2) + + scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) + + scale_color_manual( + breaks=c("pos", "neg"), values=c("blue", "red"), + na.value=grDevices::adjustcolor("grey50", 0.5)) + + labs(y=NULL, x="Odds ratio", title="Random effect: category") + + theme(legend.position="none") +ggsave("img/S1_Fig.pdf", width=132, height=100, units="mm", scale=1.2, device=cairo_ps) + +ggplot(df.re.subcategory) + + aes(estimate, term, xmin=conf.low, xmax=conf.high, color=group) + + facet_grid("facet", scales="free_y", space="free_y") + + geom_vline(xintercept=1, color="black", linetype="dashed") + + geom_errorbarh(height=0, size=1) + geom_point(size=2) + + scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) + + scale_color_manual( + breaks=c("pos", "neg"), values=c("blue", "red"), + na.value=grDevices::adjustcolor("grey50", 0.5)) + + labs(y=NULL, x="Odds ratio", title="Random effect: subcategory") + + theme(legend.position="none") +ggsave("img/S2_Fig.pdf", width=132, height=132*4, units="mm", scale=1.4, device=cairo_ps) diff --git a/analysis.Rmd b/analysis.Rmd new file mode 100644 index 0000000..acc0862 --- /dev/null +++ b/analysis.Rmd @@ -0,0 +1,401 @@ +--- +title: "Mind the gender gap: COVID-19 lockdown effects on gender differences in research" +author: "Iñaki Úcar, Margarita Torre, Antonio Elías Fernández" +output: + html_document: + df_print: paged + number_sections: true + toc: true + toc_float: + collapsed: false + smooth_scroll: false +--- + +
+Functions +```{r} +get_file <- function(x) { + dir.create("data", showWarnings=FALSE) + file <- file.path("data", x) + if (!file.exists(file)) download.file( + paste0("https://zenodo.org/record/5142676/files/", x, "?download=1"), file) + file +} + +mutate_cond <- function(.data, condition, ..., envir = parent.frame()) { + require(dplyr) + + condition <- eval(substitute(condition), .data, envir) + .data[condition, ] <- mutate(.data[condition, ], ...) + .data +} + +get_main <- function(repo, data, n=100) { + require(dplyr) + + subcat <- data %>% + filter(repository %in% repo) %>% + group_by(subcategory) %>% + summarise(weight = n(), .groups="drop") %>% + arrange(desc(weight)) %>% + pull(subcategory) + + data %>% + filter(repository %in% repo) %>% + group_by(id) %>% + summarise(main = subcat[subcat %in% subcategory][1], .groups="drop") %>% + group_by(main) %>% + summarise(N = n(), .groups="drop") %>% + arrange(desc(N)) %>% + filter(N >= n) %>% + pull(main) +} + +# performance +model_performance <- function(m) { + require(ggplot2) + + # obs. vs fitted + df <- data.frame(fitted=fitted(m), response=model.response(model.frame(m))) + ggplot(df) + aes(fitted, response) + + geom_point(alpha=0.3) + geom_abline() + geom_smooth(method="lm") + + ggpmisc::stat_poly_eq(formula=y~x, parse=TRUE) +} +``` +
+ +# Data preparation + +## Cleaning and filtering + +```{r, message=FALSE} +library(dplyr) + +articles <- readr::read_csv(get_file("categories.csv")) %>% + left_join(readr::read_csv(get_file("articles.csv")), by="id") %>% + rename(subcategory = subcategory_name) %>% + select(id, repository, category, subcategory, date) %>% + distinct() %>% + collect() %>% + + # define main categories for repo != arXiv + mutate_cond(subcategory %in% c("Epidemiology", "Clinical Trials"), repository = "medrxiv") %>% + mutate_cond(repository == "medrxiv", category = "Health Sciences") %>% + mutate_cond(repository == "biorxiv", category = "Biology") %>% + mutate_cond(repository == "psyarxiv", category = "Psychology") %>% + mutate_cond(repository == "socarxiv", category = "Social Sciences") + +articles %>% + group_by(repository, category) %>% + summarise(subcategories=length(unique(subcategory)), n=n()) +``` + +```{r, message=FALSE} +articles <- articles %>% + # mark lockdown period & move time reference to 0 + mutate(year = lubridate::year(date), month = lubridate::month(date)) %>% + mutate(lockdown = year == 2020 & month > 2) %>% + mutate(yearmon = (year - min(year)) + (month - 1) / 12) %>% + + # adjustments to Biology and Economics + mutate_cond(category == "Quantitative Biology", category = "Biology") %>% + mutate_cond(category == "Quantitative Finance", category = "Economics") %>% + + # adjustments to socarxiv + filter(subcategory != "Social and Behavioral Sciences") %>% + mutate_cond(repository == "socarxiv" & subcategory %in% c( + "Science and Technology Studies", + "Environmental Studies", + "International and Area Studies", + "Leisure Studies", + "Organization Development", + "Leadership Studies" + ), subcategory = "Other Social and Behavioral Sciences") %>% + mutate_cond(repository == "socarxiv" & subcategory %in% c( + "Library and Information Science", + "Linguistics" + ), subcategory = "Arts and Humanities") %>% + mutate_cond(repository == "socarxiv" & subcategory %in% c( + "Urban Studies and Planning", + "Social Statistics" + ), subcategory = "Sociology") %>% + mutate_cond(repository == "socarxiv" & subcategory %in% c( + "Social Work", + "Legal Studies" + ), subcategory = "Law") %>% + mutate_cond(repository == "socarxiv" & subcategory %in% c( + "Public Affairs, Public Policy and Public Administration" + ), subcategory = "Political Science") %>% + mutate_cond(repository == "socarxiv" & subcategory %in% c( + "Agricultural and Resource Economics" + ), subcategory = "Economics") %>% + mutate_cond(id %in% filter( + ., repository == "socarxiv" & subcategory == "Psychology")$id, + category = "Psychology") %>% + filter(subcategory != "Psychology") %>% + na.omit() %>% + distinct() %>% + + # get just main categories with more than 100 papers for socarxiv & psyarxiv + filter((!repository %in% c("socarxiv", "psyarxiv")) | subcategory %in% unlist( + lapply(c("socarxiv", "psyarxiv"), get_main, .))) + +articles %>% + group_by(category) %>% + summarise(subcategories=length(unique(subcategory)), n=n()) +``` + +```{r, message=FALSE} +text <- readr::read_csv(get_file("text.csv")) %>% + select(id, title) %>% + collect() %>% + mutate(covidpaper = grepl("covid-19", title, ignore.case=TRUE) | + grepl("sars-cov-2", title, ignore.case=TRUE) | + grepl("coronavirus", title, ignore.case=TRUE)) +``` + +```{r, message=FALSE} +authors <- readr::read_csv(get_file("authors.csv")) %>% + distinct() %>% + collect() %>% + mutate(across(probability, as.numeric)) %>% + mutate(across(alphabetical_ordered, as.logical)) %>% + mutate_cond(is.na(alphabetical_ordered), alphabetical_ordered=TRUE) +``` + +## Feature engineering + +```{r} +merge_info <- function(.data) .data %>% + left_join(articles, by="id") %>% + left_join(text[, c("id", "covidpaper")], by="id") %>% + distinct() %>% + na.omit() %>% + mutate_cond(year < 2020, covidpaper = FALSE) %>% + group_by(subcategory) %>% + filter(min(year) < 2020) %>% + ungroup() + +# features all +df.id <- authors %>% + filter(probability >= 0.95 & count >= 10) %>% + group_by(id) %>% + summarise(n_male = sum(gender == "male", na.rm=TRUE), + n_female = sum(gender == "female", na.rm=TRUE), + n_na = sum(is.na(gender)), + total = n_male + n_female, .groups="drop") %>% + merge_info() %>% + # remove papers with more than 25% of authors with missing gender + filter(n_na < 0.25 * (n_na + n_male + n_female)) %>% + # remove observations with less than 30 people per month + group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>% + filter(sum(total) > 30) %>% + ungroup() +df.all <- df.id %>% + group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>% + summarise(p_male = sum(n_male) / sum(total), + total = sum(total), .groups="drop") + +# features first +df.first <- authors %>% + filter(probability >= 0.95 & count >= 10) %>% + filter(!alphabetical_ordered & rank == "first") %>% + merge_info() %>% + group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>% + summarise(n_male = sum(gender == "male", na.rm=TRUE), + n_female = sum(gender == "female", na.rm=TRUE), + total = n_male + n_female, + p_male = n_male / total, .groups="drop") + +# features last +df.last <- authors %>% + filter(probability >= 0.95 & count >= 10) %>% + filter(!alphabetical_ordered & rank == "last") %>% + merge_info() %>% + group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>% + summarise(n_male = sum(gender == "male", na.rm=TRUE), + n_female = sum(gender == "female", na.rm=TRUE), + total = n_male + n_female, + p_male = n_male / total, .groups="drop") + +# features single +df.single <- authors %>% + filter(probability >= 0.95 & count >= 10) %>% + filter(rank == "single") %>% + merge_info() %>% + group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>% + summarise(n_male = sum(gender == "male", na.rm=TRUE), + n_female = sum(gender == "female", na.rm=TRUE), + total = n_male + n_female, + p_male = n_male / total, .groups="drop") +``` + +# Descriptive + +## Number of papers per month + +```{r} +library(ggplot2) +theme_set(theme_bw()) + +df.plot.papers <- count(df.id, yearmon, category) +df.plot.papers.all <- df.plot.papers %>% + group_by(yearmon) %>% + summarise(n = sum(n), .groups="drop") %>% + mutate(category="(all)") + +ggplot(df.plot.papers) + + aes(yearmon+2017, n) + facet_wrap(~category, scales="free_y") + + geom_col(data=df.plot.papers.all) + + geom_smooth(data=df.plot.papers.all) + + geom_col() + geom_smooth() + + zoo::scale_x_yearmon(format="%Y") + + labs(y="Number of papers per month", x=NULL) +``` + +## Number of authors per month + +```{r} +df.plot.authors <- df.id %>% + group_by(yearmon, category) %>% + summarise(n = sum(total), .groups="drop") +df.plot.authors.all <- df.plot.authors %>% + group_by(yearmon) %>% + summarise(n = sum(n), .groups="drop") %>% + mutate(category="(all)") + +ggplot(df.plot.authors) + + aes(yearmon+2017, n) + facet_wrap(~category, scales="free_y") + + geom_col(data=df.plot.authors.all) + + geom_smooth(data=df.plot.authors.all) + + geom_col() + geom_smooth() + + zoo::scale_x_yearmon(format="%Y") + + labs(y="Number of authors per month", x=NULL) +``` + +## Proportion of males per month + +```{r} +df.plot.p_male <- df.id %>% + group_by(yearmon, category) %>% + summarise(p_male = sum(n_male) / sum(total), .groups="drop") +df.plot.p_male.all <- df.id %>% + group_by(yearmon) %>% + summarise(p_male = sum(n_male) / sum(total), .groups="drop") %>% + mutate(category="(all)") + +ggplot(df.plot.p_male) + + aes(yearmon+2017, p_male) + facet_wrap(~category) + + geom_point(data=df.plot.p_male.all) + + geom_smooth(method="gam", data=df.plot.p_male.all) + + geom_point() + geom_smooth(method="gam") + + zoo::scale_x_yearmon(format="%Y") + + labs(y="Proportion of male authors", x=NULL) +``` + +# Modelling + +## Simple model + +```{r} +model.glm <- p_male ~ yearmon + lockdown + covidpaper +fit.glm <- glm(model.glm, df.all, family=binomial, weights=total) +summary(fit.glm) +``` + +```{r} +model_performance(fit.glm) +performance::check_model(fit.glm) +``` + +## Hierarchical model + +```{r} +library(lme4) + +model <- p_male ~ yearmon + lockdown + covidpaper + (1 | category/subcategory) +fit.all <- glmer(model, df.all, family=binomial, weights=total) +summary(fit.all) +``` + +```{r} +model_performance(fit.all) +performance::check_model(fit.all) +``` +```{r} +df.re.category <- sjPlot::get_model_data(fit.all, "re")[[2]] %>% + mutate(group = ifelse(conf.low < 1 & conf.high > 1, NA, group)) %>% + mutate(term = forcats::fct_reorder(term, estimate)) + +ggplot(df.re.category) + + aes(estimate, term, xmin=conf.low, xmax=conf.high, color=group) + + geom_vline(xintercept=1, color="black", linetype="dashed") + + geom_errorbarh(height=0, size=1) + geom_point(size=2) + + scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) + + scale_color_manual( + breaks=c("pos", "neg"), values=c("blue", "red"), + na.value=grDevices::adjustcolor("grey50", 0.5)) + + labs(y=NULL, x="Odds ratio", title="Random effect: category") + + theme(legend.position="none") +``` + +```{r, fig.asp=4} +df.re.subcategory <- sjPlot::get_model_data(fit.all, "re")[[1]] %>% + mutate(group = ifelse(conf.low < 1 & conf.high > 1, NA, group)) %>% + mutate(facet = sapply(strsplit(as.character(term), ":"), tail, 1)) %>% + mutate(term = forcats::fct_reorder(term, estimate)) + +ggplot(df.re.subcategory) + + aes(estimate, term, xmin=conf.low, xmax=conf.high, color=group) + + facet_grid("facet", scales="free_y", space="free_y") + + geom_vline(xintercept=1, color="black", linetype="dashed") + + geom_errorbarh(height=0, size=1) + geom_point(size=2) + + scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) + + scale_color_manual( + breaks=c("pos", "neg"), values=c("blue", "red"), + na.value=grDevices::adjustcolor("grey50", 0.5)) + + labs(y=NULL, x="Odds ratio", title="Random effect: subcategory") + + theme(legend.position="none") +``` + +## All together + +```{r} +fit <- list( + glm = fit.glm, + all = fit.all, + first = glmer(model, df.first, family=binomial, weights=total), + last = glmer(model, df.last, family=binomial, weights=total), + single = glmer(model, df.single, family=binomial, weights=total) +) + +df.terms <- lapply(fit, sjPlot::get_model_data, "est") %>% + bind_rows(.id="fit") %>% + mutate(type = ifelse(fit=="glm", "GLM", "GLMM")) %>% + mutate(fit = ifelse(fit=="glm", "all", fit)) %>% + mutate(fit = forcats::fct_inorder(fit)) %>% + mutate(type = ifelse(p.value >= 0.05, NA, type)) + +levels(df.terms$term) <- c("COVID paper", "lockdown", "year") +``` + +```{r, results='asis'} +stargazer::stargazer(fit, type="html") +``` + +```{r, fig.asp=1} +ggplot(df.terms) + + aes(estimate, term, xmin=conf.low, xmax=conf.high, color=type) + + facet_grid("fit", scales="free_y", space="free_y") + + geom_vline(xintercept=1, color="black", linetype="dashed") + + geom_errorbarh(height=0, size=1) + geom_point(size=2) + + ggrepel::geom_text_repel( + aes(label=p.label), nudge_y=0.3, nudge_x=0.01, segment.size=0, show.legend=FALSE) + + scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) + + scale_color_manual( + breaks=c("GLM", "GLMM"), values=c("red", "black"), + na.value=grDevices::adjustcolor("grey50", 0.5)) + + labs(y=NULL, x="Odds ratio", color="Model") + + theme(legend.position=c(.99, .99), legend.justification=c(1, 1)) + + theme(legend.background=element_rect(fill="#ffffff88")) +``` diff --git a/analysis.html b/analysis.html new file mode 100644 index 0000000..f1351f1 --- /dev/null +++ b/analysis.html @@ -0,0 +1,3685 @@ + + + + + + + + + + + + + + +Mind the gender gap: COVID-19 lockdown effects on gender differences in research + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + +
+

Functions

+
get_file <- function(x) {
+  dir.create("data", showWarnings=FALSE)
+  file <- file.path("data", x)
+  if (!file.exists(file)) download.file(
+    paste0("https://zenodo.org/record/5142676/files/", x, "?download=1"), file)
+  file
+}
+
+mutate_cond <- function(.data, condition, ..., envir = parent.frame()) {
+  require(dplyr)
+  
+  condition <- eval(substitute(condition), .data, envir)
+  .data[condition, ] <- mutate(.data[condition, ], ...)
+  .data
+}
+
+get_main <- function(repo, data, n=100) {
+  require(dplyr)
+  
+  subcat <- data %>%
+    filter(repository %in% repo) %>%
+    group_by(subcategory) %>%
+    summarise(weight = n(), .groups="drop") %>%
+    arrange(desc(weight)) %>%
+    pull(subcategory)
+  
+  data %>%
+    filter(repository %in% repo) %>%
+    group_by(id) %>%
+    summarise(main = subcat[subcat %in% subcategory][1], .groups="drop") %>%
+    group_by(main) %>%
+    summarise(N = n(), .groups="drop") %>%
+    arrange(desc(N)) %>%
+    filter(N >= n) %>%
+    pull(main)
+}
+
+# performance
+model_performance <- function(m) {
+  require(ggplot2)
+  
+  # obs. vs fitted
+  df <- data.frame(fitted=fitted(m), response=model.response(model.frame(m)))
+  ggplot(df) + aes(fitted, response) +
+    geom_point(alpha=0.3) + geom_abline() + geom_smooth(method="lm") +
+    ggpmisc::stat_poly_eq(formula=y~x, parse=TRUE)
+}
+
+
+

1 Data preparation

+
+

1.1 Cleaning and filtering

+
library(dplyr)
+
+articles <- readr::read_csv(get_file("categories.csv")) %>%
+  left_join(readr::read_csv(get_file("articles.csv")), by="id") %>%
+  rename(subcategory = subcategory_name) %>%
+  select(id, repository, category, subcategory, date) %>%
+  distinct() %>%
+  collect() %>%
+  
+  # define main categories for repo != arXiv
+  mutate_cond(subcategory %in% c("Epidemiology", "Clinical Trials"), repository = "medrxiv") %>%
+  mutate_cond(repository == "medrxiv", category = "Health Sciences") %>%
+  mutate_cond(repository == "biorxiv", category = "Biology") %>%
+  mutate_cond(repository == "psyarxiv", category = "Psychology") %>%
+  mutate_cond(repository == "socarxiv", category = "Social Sciences")
+
+articles %>%
+  group_by(repository, category) %>%
+  summarise(subcategories=length(unique(subcategory)), n=n())
+
+ +
+
articles <- articles %>%
+  # mark lockdown period & move time reference to 0
+  mutate(year = lubridate::year(date), month = lubridate::month(date)) %>%
+  mutate(lockdown = year == 2020 & month > 2) %>%
+  mutate(yearmon = (year - min(year)) + (month - 1) / 12) %>%
+
+  # adjustments to Biology and Economics
+  mutate_cond(category == "Quantitative Biology", category = "Biology") %>%
+  mutate_cond(category == "Quantitative Finance", category = "Economics") %>%
+  
+  # adjustments to socarxiv
+  filter(subcategory != "Social and Behavioral Sciences") %>%
+  mutate_cond(repository == "socarxiv" & subcategory %in% c(
+    "Science and Technology Studies",
+    "Environmental Studies",
+    "International and Area Studies",
+    "Leisure Studies",
+    "Organization Development",
+    "Leadership Studies"
+    ), subcategory = "Other Social and Behavioral Sciences") %>%
+  mutate_cond(repository == "socarxiv" & subcategory %in% c(
+    "Library and Information Science",
+    "Linguistics"
+    ), subcategory = "Arts and Humanities") %>%
+  mutate_cond(repository == "socarxiv" & subcategory %in% c(
+    "Urban Studies and Planning",
+    "Social Statistics"
+    ), subcategory = "Sociology") %>%
+  mutate_cond(repository == "socarxiv" & subcategory %in% c(
+    "Social Work",
+    "Legal Studies"
+    ), subcategory = "Law") %>%
+  mutate_cond(repository == "socarxiv" & subcategory %in% c(
+    "Public Affairs, Public Policy and Public Administration"
+    ), subcategory = "Political Science") %>%
+  mutate_cond(repository == "socarxiv" & subcategory %in% c(
+    "Agricultural and Resource Economics"
+    ), subcategory = "Economics") %>%
+  mutate_cond(id %in% filter(
+    ., repository == "socarxiv" & subcategory == "Psychology")$id,
+    category = "Psychology") %>%
+  filter(subcategory != "Psychology") %>%
+  na.omit() %>%
+  distinct() %>%
+  
+  # get just main categories with more than 100 papers for socarxiv & psyarxiv
+  filter((!repository %in% c("socarxiv", "psyarxiv")) | subcategory %in% unlist(
+    lapply(c("socarxiv", "psyarxiv"), get_main, .)))
+
+articles %>%
+  group_by(category) %>%
+  summarise(subcategories=length(unique(subcategory)), n=n())
+
+ +
+
text <- readr::read_csv(get_file("text.csv")) %>%
+  select(id, title) %>%
+  collect() %>%
+  mutate(covidpaper = grepl("covid-19", title, ignore.case=TRUE) |
+           grepl("sars-cov-2", title, ignore.case=TRUE) |
+           grepl("coronavirus", title, ignore.case=TRUE))
+
authors <- readr::read_csv(get_file("authors.csv")) %>%
+  distinct() %>%
+  collect() %>%
+  mutate(across(probability, as.numeric)) %>%
+  mutate(across(alphabetical_ordered, as.logical)) %>%
+  mutate_cond(is.na(alphabetical_ordered), alphabetical_ordered=TRUE)
+
+
+

1.2 Feature engineering

+
merge_info <- function(.data) .data %>%
+  left_join(articles, by="id") %>%
+  left_join(text[, c("id", "covidpaper")], by="id") %>%
+  distinct() %>%
+  na.omit() %>%
+  mutate_cond(year < 2020, covidpaper = FALSE) %>%
+  group_by(subcategory) %>%
+  filter(min(year) < 2020) %>%
+  ungroup()
+
+# features all
+df.id <- authors %>%
+  filter(probability >= 0.95 & count >= 10) %>%
+  group_by(id) %>%
+  summarise(n_male = sum(gender == "male", na.rm=TRUE),
+            n_female = sum(gender == "female", na.rm=TRUE),
+            n_na = sum(is.na(gender)),
+            total = n_male + n_female, .groups="drop") %>%
+  merge_info() %>%
+  # remove papers with more than 25% of authors with missing gender
+  filter(n_na < 0.25 * (n_na + n_male + n_female)) %>%
+  # remove observations with less than 30 people per month
+  group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>%
+  filter(sum(total) > 30) %>%
+  ungroup()
+df.all <- df.id %>%
+  group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>%
+  summarise(p_male = sum(n_male) / sum(total),
+            total = sum(total), .groups="drop")
+  
+# features first
+df.first <- authors %>%
+  filter(probability >= 0.95 & count >= 10) %>%
+  filter(!alphabetical_ordered & rank == "first") %>%
+  merge_info() %>%
+  group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>%
+  summarise(n_male = sum(gender == "male", na.rm=TRUE),
+            n_female = sum(gender == "female", na.rm=TRUE),
+            total = n_male + n_female,
+            p_male = n_male / total, .groups="drop")
+
+# features last
+df.last <- authors %>%
+  filter(probability >= 0.95 & count >= 10) %>%
+  filter(!alphabetical_ordered & rank == "last") %>%
+  merge_info() %>%
+  group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>%
+  summarise(n_male = sum(gender == "male", na.rm=TRUE),
+            n_female = sum(gender == "female", na.rm=TRUE),
+            total = n_male + n_female,
+            p_male = n_male / total, .groups="drop")
+
+# features single
+df.single <- authors %>%
+  filter(probability >= 0.95 & count >= 10) %>%
+  filter(rank == "single") %>%
+  merge_info() %>%
+  group_by(repository, category, subcategory, covidpaper, lockdown, yearmon) %>%
+  summarise(n_male = sum(gender == "male", na.rm=TRUE),
+            n_female = sum(gender == "female", na.rm=TRUE),
+            total = n_male + n_female,
+            p_male = n_male / total, .groups="drop")
+
+
+
+

2 Descriptive

+
+

2.1 Number of papers per month

+
library(ggplot2)
+theme_set(theme_bw())
+
+df.plot.papers <- count(df.id, yearmon, category)
+df.plot.papers.all <- df.plot.papers %>%
+  group_by(yearmon) %>%
+  summarise(n = sum(n), .groups="drop") %>%
+  mutate(category="(all)")
+
+ggplot(df.plot.papers) +
+  aes(yearmon+2017, n) + facet_wrap(~category, scales="free_y") +
+  geom_col(data=df.plot.papers.all) +
+  geom_smooth(data=df.plot.papers.all) +
+  geom_col() + geom_smooth() +
+  zoo::scale_x_yearmon(format="%Y") +
+  labs(y="Number of papers per month", x=NULL)
+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
+## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
+

+
+
+

2.2 Number of authors per month

+
df.plot.authors <- df.id %>%
+  group_by(yearmon, category) %>%
+  summarise(n = sum(total), .groups="drop")
+df.plot.authors.all <- df.plot.authors %>%
+  group_by(yearmon) %>%
+  summarise(n = sum(n), .groups="drop") %>%
+  mutate(category="(all)")
+
+ggplot(df.plot.authors) +
+  aes(yearmon+2017, n) + facet_wrap(~category, scales="free_y") +
+  geom_col(data=df.plot.authors.all) +
+  geom_smooth(data=df.plot.authors.all) +
+  geom_col() + geom_smooth() +
+  zoo::scale_x_yearmon(format="%Y") +
+  labs(y="Number of authors per month", x=NULL)
+
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
+## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
+

+
+
+

2.3 Proportion of males per month

+
df.plot.p_male <- df.id %>%
+  group_by(yearmon, category) %>%
+  summarise(p_male = sum(n_male) / sum(total), .groups="drop")
+df.plot.p_male.all <- df.id %>%
+  group_by(yearmon) %>%
+  summarise(p_male = sum(n_male) / sum(total), .groups="drop") %>%
+  mutate(category="(all)")
+
+ggplot(df.plot.p_male) +
+  aes(yearmon+2017, p_male) + facet_wrap(~category) +
+  geom_point(data=df.plot.p_male.all) +
+  geom_smooth(method="gam", data=df.plot.p_male.all) +
+  geom_point() + geom_smooth(method="gam") +
+  zoo::scale_x_yearmon(format="%Y") +
+  labs(y="Proportion of male authors", x=NULL)
+
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
+## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
+

+
+
+
+

3 Modelling

+
+

3.1 Simple model

+
model.glm <- p_male ~ yearmon + lockdown + covidpaper
+fit.glm <- glm(model.glm, df.all, family=binomial, weights=total)
+summary(fit.glm)
+
## 
+## Call:
+## glm(formula = model.glm, family = binomial, data = df.all, weights = total)
+## 
+## Deviance Residuals: 
+##      Min        1Q    Median        3Q       Max  
+## -26.0616   -1.3754    0.9651    2.5727   16.1094  
+## 
+## Coefficients:
+##                 Estimate Std. Error z value Pr(>|z|)    
+## (Intercept)     1.826402   0.006090 299.901   <2e-16 ***
+## yearmon        -0.104284   0.003042 -34.283   <2e-16 ***
+## lockdownTRUE    0.071570   0.007224   9.908   <2e-16 ***
+## covidpaperTRUE -0.613698   0.019441 -31.568   <2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## (Dispersion parameter for binomial family taken to be 1)
+## 
+##     Null deviance: 64761  on 3367  degrees of freedom
+## Residual deviance: 62173  on 3364  degrees of freedom
+## AIC: 79001
+## 
+## Number of Fisher Scoring iterations: 4
+
model_performance(fit.glm)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
performance::check_model(fit.glm)
+
## Loading required namespace: qqplotr
+

+
+
+

3.2 Hierarchical model

+
library(lme4)
+
## Loading required package: Matrix
+
model <- p_male ~ yearmon + lockdown + covidpaper + (1 | category/subcategory)
+fit.all <- glmer(model, df.all, family=binomial, weights=total)
+summary(fit.all)
+
## Generalized linear mixed model fit by maximum likelihood (Laplace
+##   Approximation) [glmerMod]
+##  Family: binomial  ( logit )
+## Formula: p_male ~ yearmon + lockdown + covidpaper + (1 | category/subcategory)
+##    Data: df.all
+## Weights: total
+## 
+##      AIC      BIC   logLik deviance df.resid 
+##  22667.8  22704.5 -11327.9  22655.8     3362 
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -6.8130 -0.7773  0.0898  0.8462  6.2945 
+## 
+## Random effects:
+##  Groups               Name        Variance Std.Dev.
+##  subcategory:category (Intercept) 0.0852   0.2919  
+##  category             (Intercept) 0.3838   0.6196  
+## Number of obs: 3368, groups:  subcategory:category, 192; category, 10
+## 
+## Fixed effects:
+##                 Estimate Std. Error z value Pr(>|z|)    
+## (Intercept)     1.595286   0.199014   8.016 1.09e-15 ***
+## yearmon        -0.049239   0.003202 -15.378  < 2e-16 ***
+## lockdownTRUE    0.030571   0.007491   4.081 4.48e-05 ***
+## covidpaperTRUE  0.076362   0.021791   3.504 0.000458 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Correlation of Fixed Effects:
+##             (Intr) yearmn lcTRUE
+## yearmon     -0.030              
+## lockdwnTRUE  0.011 -0.629       
+## covdpprTRUE -0.005 -0.001 -0.143
+
model_performance(fit.all)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
performance::check_model(fit.all)
+
## Warning in sqrt(crit * n_params * (1 - .hat)/.hat): NaNs produced
+
## Warning in sqrt(cook.levels[.level] * n_params * (1 - .hat80)/.hat80): NaNs
+## produced
+
+## Warning in sqrt(cook.levels[.level] * n_params * (1 - .hat80)/.hat80): NaNs
+## produced
+

+
df.re.category <- sjPlot::get_model_data(fit.all, "re")[[2]] %>%
+  mutate(group = ifelse(conf.low < 1 & conf.high > 1, NA, group)) %>%
+  mutate(term = forcats::fct_reorder(term, estimate))
+
+ggplot(df.re.category) +
+  aes(estimate, term, xmin=conf.low, xmax=conf.high, color=group) +
+  geom_vline(xintercept=1, color="black", linetype="dashed") +
+  geom_errorbarh(height=0, size=1) + geom_point(size=2) +
+  scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) +
+  scale_color_manual(
+    breaks=c("pos", "neg"), values=c("blue", "red"),
+    na.value=grDevices::adjustcolor("grey50", 0.5)) +
+  labs(y=NULL, x="Odds ratio", title="Random effect: category") +
+  theme(legend.position="none")
+

+
df.re.subcategory <- sjPlot::get_model_data(fit.all, "re")[[1]] %>%
+  mutate(group = ifelse(conf.low < 1 & conf.high > 1, NA, group)) %>%
+  mutate(facet = sapply(strsplit(as.character(term), ":"), tail, 1)) %>%
+  mutate(term = forcats::fct_reorder(term, estimate))
+
+ggplot(df.re.subcategory) +
+  aes(estimate, term, xmin=conf.low, xmax=conf.high, color=group) +
+  facet_grid("facet", scales="free_y", space="free_y") +
+  geom_vline(xintercept=1, color="black", linetype="dashed") +
+  geom_errorbarh(height=0, size=1) + geom_point(size=2) +
+  scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) +
+  scale_color_manual(
+    breaks=c("pos", "neg"), values=c("blue", "red"),
+    na.value=grDevices::adjustcolor("grey50", 0.5)) +
+  labs(y=NULL, x="Odds ratio", title="Random effect: subcategory") +
+  theme(legend.position="none")
+

+
+
+

3.3 All together

+
fit <- list(
+  glm = fit.glm,
+  all = fit.all,
+  first = glmer(model, df.first, family=binomial, weights=total),
+  last = glmer(model, df.last, family=binomial, weights=total),
+  single = glmer(model, df.single, family=binomial, weights=total)
+)
+
+df.terms <- lapply(fit, sjPlot::get_model_data, "est") %>%
+  bind_rows(.id="fit") %>%
+  mutate(type = ifelse(fit=="glm", "GLM", "GLMM")) %>%
+  mutate(fit = ifelse(fit=="glm", "all", fit)) %>%
+  mutate(fit = forcats::fct_inorder(fit)) %>%
+  mutate(type = ifelse(p.value >= 0.05, NA, type))
+
+levels(df.terms$term) <- c("COVID paper", "lockdown", "year")
+
stargazer::stargazer(fit, type="html")
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +Dependent variable: +
+ +
+ +p_male +
+ +logistic + +generalized linear +
+ + + +mixed-effects +
+ +(1) + +(2) + +(3) + +(4) + +(5) +
+
+yearmon + +-0.104*** + +-0.049*** + +-0.044*** + +-0.059*** + +-0.029 +
+ +(0.003) + +(0.003) + +(0.009) + +(0.009) + +(0.018) +
+ + + + + +
+lockdown + +0.072*** + +0.031*** + +0.035* + +0.008 + +0.136*** +
+ +(0.007) + +(0.007) + +(0.020) + +(0.021) + +(0.052) +
+ + + + + +
+covidpaper + +-0.614*** + +0.076*** + +0.399*** + +0.129** + +0.715** +
+ +(0.019) + +(0.022) + +(0.064) + +(0.065) + +(0.298) +
+ + + + + +
+Constant + +1.826*** + +1.595*** + +1.520*** + +1.819*** + +2.480*** +
+ +(0.006) + +(0.199) + +(0.195) + +(0.173) + +(0.169) +
+ + + + + +
+
+Observations + +3,368 + +3,368 + +3,996 + +4,027 + +3,517 +
+Log Likelihood + +-39,496.310 + +-11,327.900 + +-7,182.127 + +-6,955.634 + +-3,788.911 +
+Akaike Inf. Crit. + +79,000.620 + +22,667.800 + +14,376.250 + +13,923.270 + +7,589.823 +
+Bayesian Inf. Crit. + + +22,704.540 + +14,414.010 + +13,961.070 + +7,626.815 +
+
+Note: + +p<0.1; p<0.05; p<0.01 +
+
ggplot(df.terms) +
+  aes(estimate, term, xmin=conf.low, xmax=conf.high, color=type) +
+  facet_grid("fit", scales="free_y", space="free_y") +
+  geom_vline(xintercept=1, color="black", linetype="dashed") +
+  geom_errorbarh(height=0, size=1) + geom_point(size=2) +
+  ggrepel::geom_text_repel(
+    aes(label=p.label), nudge_y=0.3, nudge_x=0.01, segment.size=0, show.legend=FALSE) +
+  scale_x_log10(breaks=c(0.2, 0.5, 1, 2, 5)) +
+  scale_color_manual(
+    breaks=c("GLM", "GLMM"), values=c("red", "black"),
+    na.value=grDevices::adjustcolor("grey50", 0.5)) +
+  labs(y=NULL, x="Odds ratio", color="Model") +
+  theme(legend.position=c(.99, .99), legend.justification=c(1, 1)) +
+  theme(legend.background=element_rect(fill="#ffffff88"))
+

+
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/covid19.Rproj b/covid19.Rproj new file mode 100644 index 0000000..d96a2b3 --- /dev/null +++ b/covid19.Rproj @@ -0,0 +1,15 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +StripTrailingWhitespace: Yes diff --git a/old/ae_load.Rmd b/old/ae_load.Rmd new file mode 100644 index 0000000..6b2208e --- /dev/null +++ b/old/ae_load.Rmd @@ -0,0 +1,97 @@ +--- +title: 'COVID dataset: load the data' +author: "Antonio Elías" +date: "06/07/2020" +output: + html_document: default +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +# 1. Install and load the library RSQLite + +```{r} +#install.packages("RSQLite") +library(RSQLite) +``` + + +# 2. Load the data set + +## 2.1 Create connection to the db +```{r} +conn_table_CONCIERGE <- dbConnect(RSQLite::SQLite(), "database/dataRxiv.db") +``` + +## 2.2 See list of tables +```{r} +dbListTables(conn_table_CONCIERGE) +``` + +We have four tables ARTICLES, AUTHORS, CATEGORIES AND TEXT. + +## 2.3 Get the tables as data.frames + +Table ARTICLES includes: + + - **id**: article identifier. + - **repository**: repository name. + - **date**: first date of publication (YEAR-MONTH-DAY). + +```{r} +articles <- dbGetQuery(conn_table_CONCIERGE, "SELECT * FROM ARTICLES") +head(articles) +``` + +Table AUTHORS includes: + + - **id**: article identifier. + - **full_name**: full name. + - **given_name**: first word that appears at full_name. + - **alphabetical_ordered**: if the full names of the article are alphabetically ordered (TRUE) or (FALSE). + - **position**: author position 1, 2, 3... + - **rank**: first authors, middle author, last author or single author. + - **name**: named used for "genderizing" the author. + - **gender**: predicted gender (male or female). + - **probability**: number of times that the number appears as gender X divided by the number of times the name appears in the genderize data set. + - **count**: number of times that the name appears in the genderize data set. + +```{r} +authors <- dbGetQuery(conn_table_CONCIERGE, "SELECT * FROM AUTHORS") +head(authors) +``` + +Table CATEGORIES includes: + + - **id**: article identifier. + - **category**: first level of category (arxiv). + - **subcategory_code**: code given in arxiv. + - **subcategory_name**: same as code for arxiv (but understandable). For the other repositories this is the subject. + - **primary**: if it is the primary category (for arxiv takes 0 and 1) for other repositories is always 1. + +```{r} +categories <- dbGetQuery(conn_table_CONCIERGE, "SELECT * FROM CATEGORIES") +head(categories) +``` + +Table TEXT includes: + + - **id**: article identifier. + - **title**: article title. + - **abstract**: article abstract. + +```{r} +text <- dbGetQuery(conn_table_CONCIERGE, "SELECT * FROM TEXT") +head(text) +``` + +Changes in articles, authors, categories or text do not modify the main dataRxiv.db. + +## 2.4 Disconnect to the database +Disconnect the database. + +```{r} +dbDisconnect(conn_table_CONCIERGE) +``` diff --git a/old/iu_clean.R b/old/iu_clean.R new file mode 100644 index 0000000..0855f8c --- /dev/null +++ b/old/iu_clean.R @@ -0,0 +1,85 @@ +source(knitr::purl("exploratory/ae_load.Rmd", output=tempfile())) +rm(conn_table_CONCIERGE) +library(data.table) + +local(for (name in ls(envir=.GlobalEnv)) { + df <- as.data.table(get(name)) + df <- df[!duplicated(df)] + assign(name, df, .GlobalEnv) +}) + +articles <- merge(categories, articles) +rm(categories) +articles <- articles[repository != "arxiv" | !is.na(category)] +articles[, subcategory := subcategory_name] +articles <- articles[, c("id", "category", "subcategory", "repository", "date")] +articles[subcategory %in% c("Epidemiology", "Clinical Trials"), repository := "medrxiv"] +articles[repository == "medrxiv", category := "Health Sciences"] +articles[repository == "biorxiv", category := "Biology"] +articles[repository == "psyarxiv", category := "Psychology"] +articles[repository == "socarxiv", category := "Social Sciences"] +articles[category == "Quantitative Biology", category := "Biology"] +articles[category == "Quantitative Finance", category := "Economics"] +articles <- articles[subcategory != "Social and Behavioral Sciences"] +################################################################################ +# socarxiv adjustments +articles[repository == "socarxiv", subcategory := sub( + paste0( + "Science and Technology Studies|", + "Environmental Studies|", + "International and Area Studies|", + "Leisure Studies|", + "Organization Development|", + "Leadership Studies" + ), "Other Social and Behavioral Sciences", subcategory +)] +articles[repository == "socarxiv", subcategory := sub( + paste0( + "Library and Information Science|", + "Linguistics" + ), "Arts and Humanities", subcategory +)] +articles[repository == "socarxiv", subcategory := sub( + paste0( + "Urban Studies and Planning|", + "Social Statistics" + ), "Sociology", subcategory +)] +articles[repository == "socarxiv", subcategory := sub( + paste0( + "Social Work|", + "Legal Studies" + ), "Law", subcategory +)] +articles[repository == "socarxiv", subcategory := sub( + "Public Affairs, Public Policy and Public Administration", + "Political Science", subcategory +)] +articles[repository == "socarxiv", subcategory := sub( + "Agricultural and Resource Economics", + "Economics", subcategory +)] +# move stuff to psychology +psyc <- unique(articles[repository == "socarxiv" & subcategory == "Psychology"]$id) +articles[repository == "socarxiv" & id %in% psyc, category := "Psychology"] +rm(psyc) +################################################################################ +articles <- articles[subcategory != "Psychology"] +articles <- na.omit(articles) +articles <- articles[!duplicated(articles)] +################################################################################ +# restrict socarxiv and psyarxiv to main subcategories +get_main <- function(repo, data, n=100) { + data <- data[repository %in% repo] + w <- data[, .(weight=.N), by=subcategory][order(weight, decreasing=TRUE)] + data[, .(main = w$subcategory[w$subcategory %in% subcategory][1]), by=id][ + , .N, by=main][order(N, decreasing=TRUE)][N >= n]$main +} +main <- unlist(lapply(c("socarxiv", "psyarxiv"), get_main, articles)) +articles <- articles[(!repository %in% c("socarxiv", "psyarxiv")) | subcategory %in% main] +rm(main, get_main) +################################################################################ + +# checks +articles[, .(N=length(unique(subcategory))), by=category][order(category)] +articles[, .(N=length(unique(id))), by=category][order(category)] diff --git a/old/iu_clustering.R b/old/iu_clustering.R new file mode 100644 index 0000000..6382e76 --- /dev/null +++ b/old/iu_clustering.R @@ -0,0 +1,91 @@ +source("iu_clean.R") + +get_main <- function(data, del, all) { + data[!subcategory %in% del][ + , .(main = all[all %in% subcategory][1]), by=id][ + , .N, by=main][order(N, decreasing=TRUE)] +} + +psyarxiv <- articles[repository == "psyarxiv"] +psyarxiv.v <- psyarxiv[, .(weight=.N), by=subcategory][order(weight, decreasing=TRUE)] +psyarxiv.v + +get_main(psyarxiv, NULL, psyarxiv.v$subcategory) + +socarxiv <- articles[repository == "socarxiv"] +socarxiv.v <- socarxiv[, .(weight=.N), by=subcategory][order(weight, decreasing=TRUE)] +socarxiv.v + +get_main(socarxiv, NULL, socarxiv.v$subcategory) + +o <- socarxiv.v$subcategory +del <- c("Social and Behavioral Sciences", "Sociology", "Arts and Humanities", "Psychology", "Economics") +last <- get_main(socarxiv, character(0), o)$main +cat("Inicial :\n"); print(last); cat("\n") +for (i in seq_along(del)) { + sel <- get_main(socarxiv, del[1:i], o) + cat(del[i], ":\n") + print(setdiff(sel$main, last)); cat("\n") + last <- sel$main +} + +sum(.Last.value$N)/length(unique(socarxiv$id)) + +################################################################################ + +psyarxiv <- articles[repository == "psyarxiv"] +psyarxiv.v <- psyarxiv[, .(weight=.N), by=subcategory][order(weight, decreasing=TRUE)] +psyarxiv.v + +o <- psyarxiv.v$subcategory +del <- c("Social and Behavioral Sciences") +psyarxiv[ + !subcategory %in% del][ + , .(main = o[o %in% subcategory][1]), by=id][ + , .N, by=main][order(N, decreasing=TRUE)] +sum(.Last.value$N)/length(unique(psyarxiv$id)) + +################################################################################ + +total <- length(unique(socarxiv$id)) +keep <- socarxiv.v[c(2, 4:20)]$subcategory +100*(length(unique(socarxiv[subcategory %in% keep]$id)))/total + +plot(10:100, sapply(10:100, function(i) { + keep <- socarxiv.v[c(2, 4:i)]$subcategory + 100*(length(unique(socarxiv[subcategory %in% keep]$id)))/total +})) + +################################################################################ + +pairs <- as.data.frame(t(combn(unique(socarxiv$subcategory), 2))) +socarxiv.e <- merge(socarxiv, pairs, by.x="subcategory", by.y="V1", allow.cartesian=TRUE) +socarxiv.e[, keep := V2 %in% subcategory, by=id] +socarxiv.e <- socarxiv.e[keep==TRUE][, .(weight=.N), by=.(subcategory, V2)] + +library(igraph) +g <- graph_from_data_frame(socarxiv.e, directed=FALSE, socarxiv.v) +#g.gexf <- rgexf::igraph.to.gexf(g) +#writeLines(capture.output(g.gexf), "socarxiv_subcategories.gexf") +cl <- cluster_louvain(g) +sizes(cl) +sapply(communities(cl), function(i) sum(V(g)[i]$weight)) +V(g)$membership <- membership(cl) +ee <- as.data.frame(ends(g, E(g))) +ee <- cbind(ee, memV1=V(g)[ee[,1]]$membership, memV2=V(g)[ee[,2]]$membership) +g <- delete_edges(g, with(ee, which(memV2 != memV1))) + +V(g)$degree <- degree(g) +vx <- as.data.table(as_data_frame(g, "vertices")) +#View(vx[order(membership, degree, decreasing=TRUE)]) +vx.main <- vx[order(membership, degree, decreasing=TRUE), head(.SD, 1), by=membership] +g <- delete_vertices(g, vx.main[order(degree, decreasing=TRUE)]$name[1:2]) + +cl <- cluster_louvain(g) +sizes(cl) +sapply(communities(cl), function(i) sum(V(g)[i]$weight)) +V(g)$membership <- membership(cl) + +V(g)$degree <- degree(g) +vx <- as.data.table(as_data_frame(g, "vertices")) +View(vx[order(membership, degree, decreasing=TRUE)]) diff --git a/old/iu_explore.R b/old/iu_explore.R new file mode 100644 index 0000000..a1d5b7b --- /dev/null +++ b/old/iu_explore.R @@ -0,0 +1,46 @@ +for (i in unique(df.agg$subcategory)) { + p <- ggplot(df.agg[subcategory == i]) + ylim(0.4, 1) + + aes(month, r_male, color=covidpaper, size=total) + geom_point() + ggsave(paste0("img/", sub("/", "_", i), ".png"), p) +} + +ggplot(df.agg) + ylim(0.4, 1) + facet_grid(category~.) + + aes(month, r_male, color=covidpaper, size=total) + geom_point(position="jitter", alpha=0.2) + +df.agg[, month_name := factor(month - year*12, labels=c("jan", "feb", "mar", "apr", "may"))] + +km <- kmeans(df.agg$r_male, 2) +df.agg[, masculinized := xor(km$cluster > 1, rep(diff(km$centers) < 0, .N))] +ggplot(df.agg) + aes(r_male, fill=masculinized) + geom_histogram() + +ggplot(df.agg) + aes(diss, r_male) + geom_point() + geom_abline() + + geom_smooth(method=lm, formula=y~splines::bs(x, 3)) +ggplot(df.agg) + aes(r_male, diss) + geom_point() + + geom_smooth(method=lm, formula=y~splines::bs(x, 3), orientation="y") + +fit_glmer <- glmer( + r_male ~ scale(month, FALSE) + covid + covidpaper + (1 | category/subcategory), + df.agg, family=binomial, weights=total) + +model_performance(fit_glmer) +model_assumptions(fit_glmer) + +summary(fit_glmer) + +sjPlot::plot_model(fit_glmer2, "est") + ylim(0.85, 1.15) +sjPlot::plot_model(fit_glmer2, "re") + +cat <- ranef(fit_glmer)$category +subcat <- ranef(fit_glmer)$subcategory +cat.names <- sapply(strsplit(rownames(cat), ":"), head, 1) +subcat.names <- sapply(strsplit(rownames(subcat), ":"), head, 1) +cat <- data.frame(category=cat.names, cat.coef=cat[[1]]) +subcat <- data.frame(subcategory=subcat.names, subcat.coef=subcat[[1]]) + +df.diss2 <- merge(merge(df.diss, subcat), cat, by="category") +ggplot(df.diss2) + aes(diss, cat.coef+subcat.coef) + geom_point() + + geom_smooth(method=lm, formula=y~log(x)) +ggplot(df.diss2) + aes(diss, exp(cat.coef+subcat.coef)) + geom_point() + + geom_smooth(method=lm) +ggplot(df.diss2) + aes(log(diss), cat.coef+subcat.coef) + geom_point() + + geom_smooth(method=lm) diff --git a/old/iu_explore.Rmd b/old/iu_explore.Rmd new file mode 100644 index 0000000..43de9ec --- /dev/null +++ b/old/iu_explore.Rmd @@ -0,0 +1,351 @@ +--- +title: "Explorando modelicos" +author: "Iñaki Úcar" +output: + html_document: + df_print: paged + number_sections: true + toc: true + toc_float: + collapsed: false + smooth_scroll: false +--- + +# Procesado + +## Agregado y limpieza + +```{r} +library(data.table) +library(lme4) +library(ggplot2) + +# pseudo r-squared que vale para todo +r.squared <- function(m) { + lmfit <- lm(model.response(model.frame(m)) ~ fitted(m)) + summary(lmfit)$r.squared +} + +# unas medidas performance y goodness +model_performance <- function(m) { + # obs. vs fitted + df <- data.frame(fitted=fitted(m), response=model.response(model.frame(m))) + print(ggplot(df) + aes(fitted, response) + + geom_point() + geom_abline() + geom_smooth(method="lm")) + # pseudo r-squared + cat("pseudo-R2 =", r.squared(m)) +} +model_assumptions <- function(m) { + # residuals vs fitted + df <- data.frame(fitted=fitted(m), residuals=residuals(m)) + print(ggplot(df) + aes(fitted, residuals) + + geom_point() + geom_abline(slope=0) + geom_smooth(method="lm")) + # residuals + qqnorm(residuals(m)) + qqline(residuals(m)) +} + +source("exploratory/iu_features.R") + +# quito socarxiv y psyarxiv de momento +# df <- df[!repository %in% c("socarxiv", "psyarxiv")] +# quito papers donde el número de missing sea de más del 25% de los autores +df <- df[n_na < 0.25 * (n_na + n_male + n_female)] + +# dissimilarity index (no lo uso aún) +df.diss <- df[, .( + diss = 0.5*sum(abs(n_male/sum(n_male) - n_female/sum(n_female))) +), by=.(repository, category, subcategory)] + +# agregados por subcategoría, mes y covid +df.agg <- df[, .( + r_male = sum(n_male) / sum(n_male + n_female), + n_male = sum(n_male), + n_female = sum(n_female), + total = sum(n_male) + sum(n_female) +), by=.(repository, category, subcategory, covid, year, month)] +df.agg <- merge(df.agg, df.diss) + +# quito observaciones con menos de 30 personas (en un mes) +df.agg <- df.agg[total > 30] +# covid era en realidad covidpaper +df.agg[, covidpaper := covid] +# llamamos (periodo) covid a febrero-mayo 2020 +df.agg[, covid := year == 2020 & month > 2] +# me llevo la referencia de año y mes a 0 +df.agg[, year := year - min(year)] +df.agg[, month := year*12 + month - 1] +``` + +## Un vistazo rápido + +En volumen, se crece un montón en número de autores: + +```{r} +ggplot(df.agg) + aes(month, total, fill=covidpaper) + geom_col() +``` + +Crecimiento por categorías: + +```{r} +ggplot(df.agg[, .(total=sum(total)), by=.(month, category)]) + + aes(month, total, color=category) + geom_line() +``` + +Esta es la distribución de la proporción de hombres, que parece bimodal (recordemos esto): + +```{r} +ggplot(df.agg) + aes(r_male, fill=covidpaper) + geom_histogram() + facet_grid(~covid) +``` + +Y parece que, _overall_, decrece la proporción de hombres: + +```{r} +ggplot(df.agg) + aes(month, r_male) + geom_boxplot(aes(group=month)) + + geom_smooth(method="lm") +``` + +Efectos de `covid` y `covidpaper` para el año 2020: + +```{r} +ggplot(df.agg[year == 3]) + aes(interaction(covid, covidpaper), r_male) + + geom_boxplot() +``` + +Efectos de `covid` y `covidpaper` para el año 2020 para las subcategorías que tienen papers COVID: + +```{r, fig.asp=4} +subcat_with_covidpaper <- unique(df.agg[covidpaper == TRUE]$subcategory) +ggplot(df.agg[subcategory %in% subcat_with_covidpaper]) + + aes(interaction(covid, covidpaper), r_male) + + geom_boxplot() + facet_grid(category~.) +``` + +# Modelos + +La idea general es modelar el porcentaje de hombres en función de: + +- Tiempo, mes o año, da un poco lo mismo (sale consistentemente lo mismo). +- Periodo de pandemia o no (variable `covid`). +- Paper sobre COVID-19 o no (variable `covidpaper`). +- Categoría o subcategoría. + +Importante: usamos el número de autores (variable `total`) como pesos. + +## Viva Gauss y la interpretabilidad + +Empezamos por lo simple. + +### lm + +Primero vamos a ver qué podemos hacer con las categorías, que es lo más grueso: + +```{r} +fit_lm <- lm( + r_male ~ month + covid + covidpaper + category, + df.agg, weights=total) +``` + +Vamos a ver qué pinta tiene: + +```{r} +model_performance(fit_lm) +``` + +Not bad, not good. Hay como dos clusters (¿recordáis la bimodal de arriba?, ¿puede ser eso?; en cualquier caso, está claro que las categorías no lo capturan). + +```{r} +model_assumptions(fit_lm) +``` + +Los he visto peores, pero podemos hacerlo mejor. Y finalmente: + +```{r} +summary(fit_lm) +``` + +Coge 0.68 como intercept, lo que es raro raro (la media en 2017 es más alta, de alrededor de 0.82), y los efectos de `covid` y `covidpaper` son positivos (aumentan). Lo bueno es que hay muchas estrellicas. :) + +Vamos a añadir las subcategorías: + +```{r} +fit_lm2 <- lm( + r_male ~ month + covid + covidpaper + subcategory, + df.agg, weights=total) +``` + +A ver: + +```{r} +model_performance(fit_lm2) +``` + +Muuucho mejor. Está claro que las categorías son demasiado gruesas. Y los residuales: + +```{r} +model_assumptions(fit_lm2) +``` + +No están mal, pero se pueden mejorar. Finalmente: + +```{r} +summary(fit_lm2) +``` + +El intercept está mucho mejor (0.85), y `covid` y `covidpaper` siguen saliendo positivos. + +### lmer + +Vamos directamente con las subcategorías a partir de ahora. + +```{r} +fit_lmer <- lmer( + r_male ~ month + covid + covidpaper + (1 | category/subcategory), + df.agg, weights=total) +``` + +Ojo a esto: + +```{r} +model_performance(fit_lmer) +``` + +Nice! Y los residuales: + +```{r} +model_assumptions(fit_lmer) +``` + +Pues siguen desviándose de la normal en las colas. Esto realmente es esperable, porque estamos con una proporción. + +```{r} +summary(fit_lmer) +``` + +Intecept de 0.80, que está muy bien. Variables `covid` y `covidpaper` positivas (spoiler: salen siempre positivas). Lo bueno del modelo de efectos mixtos es que le cambias subcategorías por categoría y, aunque sale peor, como el modelo lineal, el intercept es estable, sale lo mismo. + +## Binomial, un poco odd + +Dado que la variable respuesta va de 0 a 1 de forma continua, lo suyo es un modelo binomial fraccional (binomial más los pesos de los _counts_). + +### glm + +Primero lo primero: + +```{r} +fit_glm <- glm( + r_male ~ month + covid + covidpaper + subcategory, + df.agg, family=binomial, weights=total) +``` + +Que nos da: + +```{r} +model_performance(fit_glm) +``` + +Muy bien, similar a lo anterior, pero ahora: + +```{r} +model_assumptions(fit_glm) +``` + +Espectaculares residuales. No habéis visto unos residuales así ni en los ejercicios de clase. Finalmente, el chorizo: + +```{r} +summary(fit_glm) +``` + +Ojo que ahora esto son odds ratio! Podemos transformar el intercept a la unidad original: + +```{r} +exp(coef(fit_glm)[1]) / (exp(coef(fit_glm)[1]) + 1) +``` + +No está mal. Quizás un poco alto. Y los efectos: + +```{r, fig.asp=4} +sjPlot::plot_model(fit_glm) +``` + +### glmer + +Y ya todo junto: binomial fraccional de efectos mixtos. Hay que escalar el mes o si no se queja (también se puede cambiar por el año, no hay mucha diferencia): + +```{r} +fit_glmer <- glmer( + r_male ~ scale(month, FALSE) + covid + covidpaper + (1 | category/subcategory), + df.agg, family=binomial, weights=total) +``` + +Tenemos: + +```{r} +model_performance(fit_glmer) +``` + +Similar, y: + +```{r} +model_assumptions(fit_glmer) +``` + +Similar. Finalmente: + +```{r} +summary(fit_glmer) +``` + +Esta es la estimación del intercept más ajustada hasta ahora: + +```{r} +exp(fixef(fit_glmer)[1]) / (exp(fixef(fit_glmer)[1]) + 1) +``` + +Nice! Efectos fijos: + +```{r} +sjPlot::plot_model(fit_glmer) + ylim(0.7, 1.3) +``` + +Y siguen saliendo positivos. Lo que me escama es que se ve una bajada en las gráficas de arriba. La media de la proporción de hombres para 2020 es más baja, pero las variables COVID salen siempre positivas (!). ¿Por qué? + +Efectos aleatorios: + +```{r} +sjPlot::plot_model(fit_glmer, "re")[[2]] +``` + +```{r, fig.asp=4} +sjPlot::plot_model(fit_glmer, "re")[[1]] +``` + +## Postre + +¿Qué pasa si usamos categorías (demasiado gruesas) más otra variable predictiva (la que nos hemos dejado todo este rato)? + +```{r} +fit_glmer2 <- glmer( + r_male ~ scale(month, FALSE) + covid + covidpaper + diss + (1 | category), + df.agg, family=binomial, weights=total) +``` + +Esto es interesante: + +```{r} +model_performance(fit_glmer2) +``` + +Siguen saliendo dos modos, dos clusters, pero se explica bastante más variabilidad. + +```{r} +model_assumptions(fit_glmer2) +``` + +Los residuales siguen siendo fetén, porque el problema es el tipo de modelo (binomial fraccional vs. gaussiano). ¿Adivináis qué pasa con los coeficientes COVID? + +```{r} +summary(fit_glmer2) +``` + +Siguen saliendo positivos. Lo que pasa ahora es que el índice de disimilaridad se ha llevado medio intercept (!). diff --git a/old/iu_explore.html b/old/iu_explore.html new file mode 100644 index 0000000..0a62108 --- /dev/null +++ b/old/iu_explore.html @@ -0,0 +1,5446 @@ + + + + + + + + + + + + + + +Explorando modelicos + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + +
+

1 Procesado

+
+

1.1 Agregado y limpieza

+
library(data.table)
+library(lme4)
+
## Loading required package: Matrix
+
library(ggplot2)
+
+# pseudo r-squared que vale para todo
+r.squared <- function(m) {
+  lmfit <-  lm(model.response(model.frame(m)) ~ fitted(m))
+  summary(lmfit)$r.squared
+}
+
+# unas medidas performance y goodness
+model_performance <- function(m) {
+  # obs. vs fitted
+  df <- data.frame(fitted=fitted(m), response=model.response(model.frame(m)))
+  print(ggplot(df) + aes(fitted, response) + geom_point() + geom_smooth(method="lm"))
+  # pseudo r-squared
+  cat("pseudo-R2 =", r.squared(m))
+}
+model_assumptions <- function(m) {
+  # residuals vs fitted
+  df <- data.frame(fitted=fitted(m), residuals=residuals(m))
+  print(ggplot(df) + aes(fitted, residuals) + geom_point() + geom_smooth(method="lm"))
+  # residuals
+  qqnorm(residuals(m))
+  qqline(residuals(m))
+}
+
+source("exploratory/iu_features.R")
+
+# quito socarxiv y psyarxiv de momento
+df <- df[!repository %in% c("socarxiv", "psyarxiv")]
+# quito papers donde el número de missing sea de más del 25% de los autores
+df <- df[n_na < 0.25 * (n_na + n_male + n_female)]
+
+# dissimilarity index (no lo uso aún)
+df.diss <- df[, .(
+  diss = 0.5*sum(abs(n_male/sum(n_male) - n_female/sum(n_female)))
+), by=.(repository, category, subcategory)]
+
+# agregados por subcategoría, mes y covid
+df.agg <- df[, .(
+  r_male = sum(n_male) / sum(n_male + n_female),
+  n_male = sum(n_male),
+  n_female = sum(n_female),
+  total = sum(n_male) + sum(n_female)
+), by=.(repository, category, subcategory, covid, year, month)]
+df.agg <- merge(df.agg, df.diss)
+
+# quito observaciones con menos de 30 personas (en un mes)
+df.agg <- df.agg[total > 30]
+# covid era en realidad covidpaper
+df.agg[, covidpaper := covid]
+# llamamos (periodo) covid a febrero-mayo 2020
+df.agg[, covid := year == 2020 & month > 2]
+# me llevo la referencia de año y mes a 0
+df.agg[, year := year - min(year)]
+df.agg[, month := year*12 + month - 1]
+
+
+

1.2 Un vistazo rápido

+

En volumen, se crece un montón en número de autores:

+
ggplot(df.agg) + aes(month, total) + geom_col()
+

+

Esta es la distribución de la proporción de hombres, que parece bimodal (recordemos esto):

+
ggplot(df.agg) + aes(r_male, fill=covid) + geom_histogram()
+
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
+

+

Y parece que, overall, decrece la proporción de hombres:

+
ggplot(df.agg) + aes(month, r_male) + geom_boxplot(aes(group=month)) +
+  geom_smooth(method="lm")
+
## `geom_smooth()` using formula 'y ~ x'
+

+

Efectos de covid y covidpaper para el año 2020:

+
ggplot(df.agg[year == 3]) + aes(interaction(covid, covidpaper), r_male) +
+  geom_boxplot()
+

+

Efectos de covid y covidpaper para el año 2020 para las subcategorías que tienen papers COVID:

+
subcat_with_covidpaper <- unique(df.agg[covidpaper == TRUE]$subcategory)
+ggplot(df.agg[subcategory %in% subcat_with_covidpaper]) +
+  aes(interaction(covid, covidpaper), r_male) +
+  geom_boxplot() + facet_grid(category~.)
+

+
+
+
+

2 Modelos

+

La idea general es modelar el porcentaje de hombres en función de:

+
    +
  • Tiempo, mes o año, da un poco lo mismo (sale consistentemente lo mismo).
  • +
  • Periodo de pandemia o no (variable covid).
  • +
  • Paper sobre COVID-19 o no (variable covidpaper).
  • +
  • Categoría o subcategoría.
  • +
+

Importante: usamos el número de autores (variable total) como pesos.

+
+

2.1 Viva Gauss y la interpretabilidad

+

Empezamos por lo simple.

+
+

2.1.1 lm

+

Primero vamos a ver qué podemos hacer con las categorías, que es lo más grueso:

+
fit_lm <- lm(
+  r_male ~ month + covid + covidpaper + category,
+  df.agg, weights=total)
+

Vamos a ver qué pinta tiene:

+
model_performance(fit_lm)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
## pseudo-R2 = 0.6152669
+

Not bad, not good. Hay como dos clusters (¿recordáis la bimodal de arriba?, ¿puede ser eso?; en cualquier caso, está claro que las categorías no lo capturan).

+
model_assumptions(fit_lm)
+
## `geom_smooth()` using formula 'y ~ x'
+

+

Los he visto peores, pero podemos hacerlo mejor. Y finalmente:

+
summary(fit_lm)
+
## 
+## Call:
+## lm(formula = r_male ~ month + covid + covidpaper + category, 
+##     data = df.agg, weights = total)
+## 
+## Weighted Residuals:
+##     Min      1Q  Median      3Q     Max 
+## -5.7083 -0.4090  0.1254  0.5840  3.4994 
+## 
+## Coefficients:
+##                                                      Estimate Std. Error
+## (Intercept)                                         6.814e-01  2.287e-03
+## month                                              -5.815e-04  6.858e-05
+## covidTRUE                                           3.978e-03  1.979e-03
+## covidpaperTRUE                                      1.306e-02  5.476e-03
+## categoryComputer Science                            1.639e-01  2.135e-03
+## categoryEconomics                                   1.883e-01  1.070e-02
+## categoryElectrical Engineering and Systems Science  1.603e-01  4.548e-03
+## categoryHealth Sciences                            -2.569e-02  5.168e-03
+## categoryMathematics                                 1.769e-01  2.524e-03
+## categoryPhysics                                     1.650e-01  2.006e-03
+## categoryStatistics                                  1.628e-01  3.486e-03
+##                                                    t value Pr(>|t|)    
+## (Intercept)                                        297.891   <2e-16 ***
+## month                                               -8.478   <2e-16 ***
+## covidTRUE                                            2.010   0.0445 *  
+## covidpaperTRUE                                       2.386   0.0171 *  
+## categoryComputer Science                            76.750   <2e-16 ***
+## categoryEconomics                                   17.602   <2e-16 ***
+## categoryElectrical Engineering and Systems Science  35.237   <2e-16 ***
+## categoryHealth Sciences                             -4.971    7e-07 ***
+## categoryMathematics                                 70.088   <2e-16 ***
+## categoryPhysics                                     82.231   <2e-16 ***
+## categoryStatistics                                  46.711   <2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 0.8612 on 3462 degrees of freedom
+## Multiple R-squared:  0.7506, Adjusted R-squared:  0.7499 
+## F-statistic:  1042 on 10 and 3462 DF,  p-value: < 2.2e-16
+

Coge 0.68 como intercept, lo que es raro raro (la media en 2017 es más alta, de alrededor de 0.82), y los efectos de covid y covidpaper son positivos (aumentan). Lo bueno es que hay muchas estrellicas. :)

+

Vamos a añadir las subcategorías:

+
fit_lm2 <- lm(
+  r_male ~ month + covid + covidpaper + subcategory,
+  df.agg, weights=total)
+

A ver:

+
model_performance(fit_lm2)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
## pseudo-R2 = 0.8002547
+

Muuucho mejor. Está claro que las categorías son demasiado gruesas. Y los residuales:

+
model_assumptions(fit_lm2)
+
## `geom_smooth()` using formula 'y ~ x'
+

+

No están mal, pero se pueden mejorar. Finalmente:

+
summary(fit_lm2)
+
## 
+## Call:
+## lm(formula = r_male ~ month + covid + covidpaper + subcategory, 
+##     data = df.agg, weights = total)
+## 
+## Weighted Residuals:
+##      Min       1Q   Median       3Q      Max 
+## -2.21653 -0.31031  0.02346  0.33057  2.89103 
+## 
+## Coefficients:
+##                                                                                Estimate
+## (Intercept)                                                                   8.586e-01
+## month                                                                        -5.251e-04
+## covidTRUE                                                                     2.792e-03
+## covidpaperTRUE                                                                1.302e-02
+## subcategoryAdaptation and Self-Organizing Systems                            -1.640e-02
+## subcategoryAddiction Medicine                                                -2.319e-01
+## subcategoryAlgebraic Geometry                                                 1.080e-02
+## subcategoryAlgebraic Topology                                                 9.395e-03
+## subcategoryAllergy and Immunology                                            -2.548e-01
+## subcategoryAnalysis of PDEs                                                  -1.809e-02
+## subcategoryAnimal Behavior and Cognition                                     -1.844e-01
+## subcategoryApplications                                                      -7.477e-02
+## subcategoryApplied Physics                                                   -1.995e-02
+## subcategoryArtificial Intelligence                                           -1.475e-02
+## subcategoryAstrophysics of Galaxies                                          -9.107e-02
+## subcategoryAtmospheric and Oceanic Physics                                   -7.857e-03
+## subcategoryAtomic and Molecular Clusters                                     -2.281e-02
+## subcategoryAtomic Physics                                                     1.778e-02
+## subcategoryAudio and Speech Processing                                       -1.833e-02
+## subcategoryBiochemistry                                                      -1.664e-01
+## subcategoryBioengineering                                                    -1.341e-01
+## subcategoryBioinformatics                                                    -1.131e-01
+## subcategoryBiological Physics                                                -4.417e-02
+## subcategoryBiomolecules                                                      -7.660e-02
+## subcategoryBiophysics                                                        -1.050e-01
+## subcategoryCancer Biology                                                    -2.140e-01
+## subcategoryCardiovascular Medicine                                           -1.062e-01
+## subcategoryCategory Theory                                                    5.785e-03
+## subcategoryCell Behavior                                                     -1.010e-01
+## subcategoryCell Biology                                                      -2.182e-01
+## subcategoryChaotic Dynamics                                                   1.514e-02
+## subcategoryChemical Physics                                                  -1.081e-02
+## subcategoryClassical Analysis and ODEs                                       -4.367e-03
+## subcategoryClassical Physics                                                  2.060e-02
+## subcategoryClinical Trials                                                   -2.474e-01
+## subcategoryCombinatorics                                                     -2.698e-02
+## subcategoryCommutative Algebra                                               -2.599e-02
+## subcategoryComplex Variables                                                  2.402e-02
+## subcategoryComputation                                                        1.172e-02
+## subcategoryComputation and Language                                          -5.599e-02
+## subcategoryComputational Complexity                                           3.698e-02
+## subcategoryComputational Engineering, Finance, and Science                    1.179e-02
+## subcategoryComputational Finance                                              3.900e-02
+## subcategoryComputational Geometry                                             1.996e-02
+## subcategoryComputational Physics                                              2.515e-02
+## subcategoryComputer Science and Game Theory                                  -9.428e-05
+## subcategoryComputer Vision and Pattern Recognition                           -2.722e-02
+## subcategoryComputers and Society                                             -7.996e-02
+## subcategoryCosmology and Nongalactic Astrophysics                            -3.277e-02
+## subcategoryCryptography and Security                                         -8.358e-03
+## subcategoryData Analysis, Statistics and Probability                          5.217e-03
+## subcategoryData Structures and Algorithms                                     2.054e-02
+## subcategoryDatabases                                                         -2.523e-02
+## subcategoryDermatology                                                       -1.574e-01
+## subcategoryDevelopmental Biology                                             -2.402e-01
+## subcategoryDifferential Geometry                                              1.462e-02
+## subcategoryDigital Libraries                                                 -6.944e-02
+## subcategoryDiscrete Mathematics                                               1.395e-02
+## subcategoryDisordered Systems and Neural Networks                             1.033e-02
+## subcategoryDistributed, Parallel, and Cluster Computing                       6.835e-03
+## subcategoryDynamical Systems                                                 -1.242e-02
+## subcategoryEarth and Planetary Astrophysics                                  -6.191e-02
+## subcategoryEcology                                                           -1.650e-01
+## subcategoryEconometrics                                                       3.233e-02
+## subcategoryEconomics                                                         -5.223e-03
+## subcategoryEmergency Medicine                                                -1.332e-01
+## subcategoryEmerging Technologies                                              1.923e-02
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease) -2.314e-01
+## subcategoryEpidemiology                                                      -1.892e-01
+## subcategoryEvolutionary Biology                                              -1.578e-01
+## subcategoryExactly Solvable and Integrable Systems                           -3.029e-03
+## subcategoryFluid Dynamics                                                     2.755e-02
+## subcategoryFormal Languages and Automata Theory                               2.357e-03
+## subcategoryFunctional Analysis                                                6.153e-03
+## subcategoryGastroenterology                                                  -1.989e-01
+## subcategoryGeneral Economics                                                 -1.422e-02
+## subcategoryGeneral Finance                                                    1.606e-02
+## subcategoryGeneral Literature                                                 4.336e-02
+## subcategoryGeneral Mathematics                                                2.371e-02
+## subcategoryGeneral Physics                                                    3.249e-02
+## subcategoryGeneral Relativity and Quantum Cosmology                           1.106e-02
+## subcategoryGeneral Topology                                                   1.676e-02
+## subcategoryGenetic and Genomic Medicine                                      -2.634e-01
+## subcategoryGenetics                                                          -2.130e-01
+## subcategoryGenomics                                                          -1.788e-01
+## subcategoryGeometric Topology                                                -2.478e-02
+## subcategoryGeophysics                                                        -8.899e-03
+## subcategoryGeriatric Medicine                                                -4.088e-02
+## subcategoryGraphics                                                           6.636e-03
+## subcategoryGroup Theory                                                       5.034e-03
+## subcategoryHardware Architecture                                              2.288e-02
+## subcategoryHealth Economics                                                  -4.185e-02
+## subcategoryHealth Informatics                                                -1.369e-01
+## subcategoryHealth Policy                                                     -1.777e-01
+## subcategoryHealth Systems and Quality Improvement                            -2.068e-01
+## subcategoryHematology                                                        -1.351e-01
+## subcategoryHigh Energy Astrophysical Phenomena                               -4.120e-02
+## subcategoryHigh Energy Physics - Experiment                                  -2.383e-02
+## subcategoryHigh Energy Physics - Lattice                                      5.297e-02
+## subcategoryHigh Energy Physics - Phenomenology                               -4.117e-04
+## subcategoryHigh Energy Physics - Theory                                       3.808e-02
+## subcategoryHistory and Overview                                              -2.321e-02
+## subcategoryHistory and Philosophy of Physics                                  3.613e-02
+## subcategoryHIV/AIDS                                                          -2.480e-01
+## subcategoryHuman-Computer Interaction                                        -7.984e-02
+## subcategoryImage and Video Processing                                        -3.274e-02
+## subcategoryImmunology                                                        -2.366e-01
+## subcategoryInfectious Diseases (except HIV/AIDS)                             -2.123e-01
+## subcategoryInformation Retrieval                                             -5.069e-02
+## subcategoryInformation Theory                                                -2.084e-02
+## subcategoryInstrumentation and Detectors                                     -4.746e-03
+## subcategoryInstrumentation and Methods for Astrophysics                      -3.700e-02
+## subcategoryIntensive Care and Critical Care Medicine                         -1.193e-01
+## subcategoryK-Theory and Homology                                              4.583e-02
+## subcategoryLogic                                                              4.804e-02
+## subcategoryLogic in Computer Science                                          2.615e-02
+## subcategoryMachine Learning                                                  -3.734e-03
+## subcategoryMaterials Science                                                 -2.492e-02
+## subcategoryMathematical Finance                                              -1.429e-02
+## subcategoryMathematical Physics                                               4.016e-02
+## subcategoryMathematical Software                                              3.915e-02
+## subcategoryMedical Education                                                 -8.481e-02
+## subcategoryMedical Physics                                                   -5.418e-02
+## subcategoryMesoscale and Nanoscale Physics                                    5.368e-03
+## subcategoryMethodology                                                       -4.983e-02
+## subcategoryMetric Geometry                                                    2.543e-02
+## subcategoryMicrobiology                                                      -2.203e-01
+## subcategoryMolecular Biology                                                 -2.041e-01
+## subcategoryMolecular Networks                                                -7.379e-02
+## subcategoryMultiagent Systems                                                 2.182e-03
+## subcategoryMultimedia                                                        -4.221e-02
+## subcategoryNephrology                                                        -2.355e-01
+## subcategoryNetworking and Internet Architecture                              -6.775e-03
+## subcategoryNeural and Evolutionary Computing                                  1.240e-02
+## subcategoryNeurology                                                         -1.860e-01
+## subcategoryNeurons and Cognition                                             -6.842e-02
+## subcategoryNeuroscience                                                      -1.793e-01
+## subcategoryNuclear Experiment                                                -2.607e-02
+## subcategoryNuclear Theory                                                    -4.704e-03
+## subcategoryNumber Theory                                                      1.850e-02
+## subcategoryNumerical Analysis                                                -3.937e-03
+## subcategoryNutrition                                                         -2.864e-01
+## subcategoryObstetrics and Gynecology                                         -4.172e-01
+## subcategoryOccupational and Environmental Health                             -2.193e-01
+## subcategoryOncology                                                          -2.191e-01
+## subcategoryOperating Systems                                                  4.299e-02
+## subcategoryOperator Algebras                                                  2.816e-02
+## subcategoryOphthalmology                                                     -2.110e-01
+## subcategoryOptics                                                             1.396e-03
+## subcategoryOptimization and Control                                           2.768e-03
+## subcategoryOther Computer Science                                            -2.097e-02
+## subcategoryOther Condensed Matter                                             2.083e-02
+## subcategoryOther Quantitative Biology                                        -9.560e-02
+## subcategoryOther Statistics                                                  -7.270e-02
+## subcategoryOtolaryngology                                                    -7.187e-02
+## subcategoryPaleontology                                                      -4.427e-02
+## subcategoryPathology                                                         -2.275e-01
+## subcategoryPattern Formation and Solitons                                     1.064e-02
+## subcategoryPediatrics                                                        -3.002e-01
+## subcategoryPerformance                                                        1.320e-02
+## subcategoryPharmacology and Therapeutics                                     -1.899e-01
+## subcategoryPharmacology and Toxicology                                       -1.975e-01
+## subcategoryPhysics and Society                                               -2.547e-02
+## subcategoryPhysics Education                                                 -1.397e-01
+## subcategoryPhysiology                                                        -2.161e-01
+## subcategoryPlant Biology                                                     -1.936e-01
+## subcategoryPlasma Physics                                                     2.798e-02
+## subcategoryPopular Physics                                                   -5.809e-02
+## subcategoryPopulations and Evolution                                         -7.796e-02
+## subcategoryPortfolio Management                                              -3.625e-02
+## subcategoryPricing of Securities                                              8.302e-02
+## subcategoryPrimary Care Research                                             -2.276e-01
+## subcategoryProbability                                                        1.016e-02
+## subcategoryProgramming Languages                                              3.357e-02
+## subcategoryPsychiatry and Clinical Psychology                                -2.434e-01
+## subcategoryPublic and Global Health                                          -2.302e-01
+## subcategoryQuantitative Methods                                              -8.585e-02
+## subcategoryQuantum Algebra                                                    1.857e-03
+## subcategoryQuantum Gases                                                      2.231e-02
+## subcategoryQuantum Physics                                                    1.482e-02
+## subcategoryRadiology and Imaging                                             -1.573e-01
+## subcategoryRehabilitation Medicine and Physical Therapy                      -2.793e-01
+## subcategoryRepresentation Theory                                             -1.796e-02
+## subcategoryRespiratory Medicine                                              -1.760e-01
+## subcategoryRheumatology                                                      -2.791e-01
+## subcategoryRings and Algebras                                                -3.891e-02
+## subcategoryRisk Management                                                   -9.479e-03
+## subcategoryRobotics                                                           1.247e-02
+## subcategoryScientific Communication and Education                            -2.784e-01
+## subcategorySexual and Reproductive Health                                    -4.340e-01
+## subcategorySignal Processing                                                 -1.957e-02
+## subcategorySocial and Information Networks                                   -4.321e-02
+## subcategorySoft Condensed Matter                                             -1.896e-02
+## subcategorySoftware Engineering                                              -2.854e-03
+## subcategorySolar and Stellar Astrophysics                                    -7.034e-02
+## subcategorySound                                                             -1.348e-02
+## subcategorySpace Physics                                                     -2.448e-02
+## subcategorySpectral Theory                                                    6.502e-03
+## subcategorySports Medicine                                                   -6.391e-02
+## subcategoryStatistical Finance                                                3.257e-03
+## subcategoryStatistical Mechanics                                              3.434e-02
+## subcategoryStatistics Theory                                                 -2.578e-02
+## subcategoryStrongly Correlated Electrons                                      1.497e-02
+## subcategorySubcellular Processes                                             -7.617e-02
+## subcategorySuperconductivity                                                 -8.860e-04
+## subcategorySurgery                                                           -1.248e-01
+## subcategorySymbolic Computation                                               2.322e-02
+## subcategorySymplectic Geometry                                                1.018e-02
+## subcategorySynthetic Biology                                                 -1.429e-01
+## subcategorySystems and Control                                                1.370e-02
+## subcategorySystems Biology                                                   -1.297e-01
+## subcategoryTheoretical Economics                                              5.131e-02
+## subcategoryTissues and Organs                                                -1.312e-01
+## subcategoryTrading and Market Microstructure                                  3.232e-02
+## subcategoryZoology                                                           -1.661e-01
+##                                                                              Std. Error
+## (Intercept)                                                                   1.075e-02
+## month                                                                         3.958e-05
+## covidTRUE                                                                     1.138e-03
+## covidpaperTRUE                                                                3.445e-03
+## subcategoryAdaptation and Self-Organizing Systems                             1.582e-02
+## subcategoryAddiction Medicine                                                 5.841e-02
+## subcategoryAlgebraic Geometry                                                 1.184e-02
+## subcategoryAlgebraic Topology                                                 1.370e-02
+## subcategoryAllergy and Immunology                                             3.120e-02
+## subcategoryAnalysis of PDEs                                                   1.141e-02
+## subcategoryAnimal Behavior and Cognition                                      1.470e-02
+## subcategoryApplications                                                       1.204e-02
+## subcategoryApplied Physics                                                    1.141e-02
+## subcategoryArtificial Intelligence                                            1.117e-02
+## subcategoryAstrophysics of Galaxies                                           1.099e-02
+## subcategoryAtmospheric and Oceanic Physics                                    1.531e-02
+## subcategoryAtomic and Molecular Clusters                                      1.973e-02
+## subcategoryAtomic Physics                                                     1.224e-02
+## subcategoryAudio and Speech Processing                                        1.242e-02
+## subcategoryBiochemistry                                                       1.183e-02
+## subcategoryBioengineering                                                     1.258e-02
+## subcategoryBioinformatics                                                     1.131e-02
+## subcategoryBiological Physics                                                 1.258e-02
+## subcategoryBiomolecules                                                       1.645e-02
+## subcategoryBiophysics                                                         1.201e-02
+## subcategoryCancer Biology                                                     1.151e-02
+## subcategoryCardiovascular Medicine                                            1.964e-02
+## subcategoryCategory Theory                                                    1.593e-02
+## subcategoryCell Behavior                                                      1.928e-02
+## subcategoryCell Biology                                                       1.142e-02
+## subcategoryChaotic Dynamics                                                   1.488e-02
+## subcategoryChemical Physics                                                   1.192e-02
+## subcategoryClassical Analysis and ODEs                                        1.300e-02
+## subcategoryClassical Physics                                                  1.558e-02
+## subcategoryClinical Trials                                                    3.046e-02
+## subcategoryCombinatorics                                                      1.139e-02
+## subcategoryCommutative Algebra                                                1.446e-02
+## subcategoryComplex Variables                                                  1.380e-02
+## subcategoryComputation                                                        1.377e-02
+## subcategoryComputation and Language                                           1.116e-02
+## subcategoryComputational Complexity                                           1.383e-02
+## subcategoryComputational Engineering, Finance, and Science                    1.481e-02
+## subcategoryComputational Finance                                              2.633e-02
+## subcategoryComputational Geometry                                             1.443e-02
+## subcategoryComputational Physics                                              1.182e-02
+## subcategoryComputer Science and Game Theory                                   1.338e-02
+## subcategoryComputer Vision and Pattern Recognition                            1.090e-02
+## subcategoryComputers and Society                                              1.199e-02
+## subcategoryCosmology and Nongalactic Astrophysics                             1.119e-02
+## subcategoryCryptography and Security                                          1.149e-02
+## subcategoryData Analysis, Statistics and Probability                          1.391e-02
+## subcategoryData Structures and Algorithms                                     1.189e-02
+## subcategoryDatabases                                                          1.306e-02
+## subcategoryDermatology                                                        7.788e-02
+## subcategoryDevelopmental Biology                                              1.214e-02
+## subcategoryDifferential Geometry                                              1.210e-02
+## subcategoryDigital Libraries                                                  1.574e-02
+## subcategoryDiscrete Mathematics                                               1.322e-02
+## subcategoryDisordered Systems and Neural Networks                             1.306e-02
+## subcategoryDistributed, Parallel, and Cluster Computing                       1.169e-02
+## subcategoryDynamical Systems                                                  1.193e-02
+## subcategoryEarth and Planetary Astrophysics                                   1.128e-02
+## subcategoryEcology                                                            1.194e-02
+## subcategoryEconometrics                                                       1.802e-02
+## subcategoryEconomics                                                          1.850e-02
+## subcategoryEmergency Medicine                                                 3.659e-02
+## subcategoryEmerging Technologies                                              1.476e-02
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease)  3.213e-02
+## subcategoryEpidemiology                                                       1.170e-02
+## subcategoryEvolutionary Biology                                               1.181e-02
+## subcategoryExactly Solvable and Integrable Systems                            1.701e-02
+## subcategoryFluid Dynamics                                                     1.198e-02
+## subcategoryFormal Languages and Automata Theory                               1.616e-02
+## subcategoryFunctional Analysis                                                1.233e-02
+## subcategoryGastroenterology                                                   2.499e-02
+## subcategoryGeneral Economics                                                  1.943e-02
+## subcategoryGeneral Finance                                                    2.449e-02
+## subcategoryGeneral Literature                                                 7.608e-02
+## subcategoryGeneral Mathematics                                                3.528e-02
+## subcategoryGeneral Physics                                                    2.039e-02
+## subcategoryGeneral Relativity and Quantum Cosmology                           1.128e-02
+## subcategoryGeneral Topology                                                   1.976e-02
+## subcategoryGenetic and Genomic Medicine                                       1.487e-02
+## subcategoryGenetics                                                           1.131e-02
+## subcategoryGenomics                                                           1.119e-02
+## subcategoryGeometric Topology                                                 1.320e-02
+## subcategoryGeophysics                                                         1.450e-02
+## subcategoryGeriatric Medicine                                                 7.217e-02
+## subcategoryGraphics                                                           1.465e-02
+## subcategoryGroup Theory                                                       1.310e-02
+## subcategoryHardware Architecture                                              1.593e-02
+## subcategoryHealth Economics                                                   4.790e-02
+## subcategoryHealth Informatics                                                 1.893e-02
+## subcategoryHealth Policy                                                      3.059e-02
+## subcategoryHealth Systems and Quality Improvement                             2.901e-02
+## subcategoryHematology                                                         5.882e-02
+## subcategoryHigh Energy Astrophysical Phenomena                                1.121e-02
+## subcategoryHigh Energy Physics - Experiment                                   1.167e-02
+## subcategoryHigh Energy Physics - Lattice                                      1.303e-02
+## subcategoryHigh Energy Physics - Phenomenology                                1.111e-02
+## subcategoryHigh Energy Physics - Theory                                       1.119e-02
+## subcategoryHistory and Overview                                               2.467e-02
+## subcategoryHistory and Philosophy of Physics                                  2.276e-02
+## subcategoryHIV/AIDS                                                           3.750e-02
+## subcategoryHuman-Computer Interaction                                         1.219e-02
+## subcategoryImage and Video Processing                                         1.156e-02
+## subcategoryImmunology                                                         1.165e-02
+## subcategoryInfectious Diseases (except HIV/AIDS)                              1.187e-02
+## subcategoryInformation Retrieval                                              1.216e-02
+## subcategoryInformation Theory                                                 1.102e-02
+## subcategoryInstrumentation and Detectors                                      1.174e-02
+## subcategoryInstrumentation and Methods for Astrophysics                       1.133e-02
+## subcategoryIntensive Care and Critical Care Medicine                          2.011e-02
+## subcategoryK-Theory and Homology                                              1.894e-02
+## subcategoryLogic                                                              1.418e-02
+## subcategoryLogic in Computer Science                                          1.287e-02
+## subcategoryMachine Learning                                                   1.081e-02
+## subcategoryMaterials Science                                                  1.101e-02
+## subcategoryMathematical Finance                                               2.010e-02
+## subcategoryMathematical Physics                                               1.114e-02
+## subcategoryMathematical Software                                              2.087e-02
+## subcategoryMedical Education                                                  7.441e-02
+## subcategoryMedical Physics                                                    1.334e-02
+## subcategoryMesoscale and Nanoscale Physics                                    1.105e-02
+## subcategoryMethodology                                                        1.188e-02
+## subcategoryMetric Geometry                                                    1.466e-02
+## subcategoryMicrobiology                                                       1.115e-02
+## subcategoryMolecular Biology                                                  1.183e-02
+## subcategoryMolecular Networks                                                 1.783e-02
+## subcategoryMultiagent Systems                                                 1.385e-02
+## subcategoryMultimedia                                                         1.434e-02
+## subcategoryNephrology                                                         4.593e-02
+## subcategoryNetworking and Internet Architecture                               1.170e-02
+## subcategoryNeural and Evolutionary Computing                                  1.206e-02
+## subcategoryNeurology                                                          1.629e-02
+## subcategoryNeurons and Cognition                                              1.292e-02
+## subcategoryNeuroscience                                                       1.098e-02
+## subcategoryNuclear Experiment                                                 1.275e-02
+## subcategoryNuclear Theory                                                     1.197e-02
+## subcategoryNumber Theory                                                      1.201e-02
+## subcategoryNumerical Analysis                                                 1.132e-02
+## subcategoryNutrition                                                          5.527e-02
+## subcategoryObstetrics and Gynecology                                          5.959e-02
+## subcategoryOccupational and Environmental Health                              2.720e-02
+## subcategoryOncology                                                           1.640e-02
+## subcategoryOperating Systems                                                  3.679e-02
+## subcategoryOperator Algebras                                                  1.507e-02
+## subcategoryOphthalmology                                                      3.970e-02
+## subcategoryOptics                                                             1.121e-02
+## subcategoryOptimization and Control                                           1.134e-02
+## subcategoryOther Computer Science                                             2.620e-02
+## subcategoryOther Condensed Matter                                             1.514e-02
+## subcategoryOther Quantitative Biology                                         4.812e-02
+## subcategoryOther Statistics                                                   3.016e-02
+## subcategoryOtolaryngology                                                     5.628e-02
+## subcategoryPaleontology                                                       7.981e-02
+## subcategoryPathology                                                          1.510e-02
+## subcategoryPattern Formation and Solitons                                     1.554e-02
+## subcategoryPediatrics                                                         3.346e-02
+## subcategoryPerformance                                                        1.522e-02
+## subcategoryPharmacology and Therapeutics                                      3.134e-02
+## subcategoryPharmacology and Toxicology                                        1.422e-02
+## subcategoryPhysics and Society                                                1.216e-02
+## subcategoryPhysics Education                                                  1.947e-02
+## subcategoryPhysiology                                                         1.357e-02
+## subcategoryPlant Biology                                                      1.205e-02
+## subcategoryPlasma Physics                                                     1.312e-02
+## subcategoryPopular Physics                                                    3.777e-02
+## subcategoryPopulations and Evolution                                          1.298e-02
+## subcategoryPortfolio Management                                               4.872e-02
+## subcategoryPricing of Securities                                              4.295e-02
+## subcategoryPrimary Care Research                                              5.768e-02
+## subcategoryProbability                                                        1.154e-02
+## subcategoryProgramming Languages                                              1.392e-02
+## subcategoryPsychiatry and Clinical Psychology                                 1.668e-02
+## subcategoryPublic and Global Health                                           1.428e-02
+## subcategoryQuantitative Methods                                               1.266e-02
+## subcategoryQuantum Algebra                                                    1.447e-02
+## subcategoryQuantum Gases                                                      1.226e-02
+## subcategoryQuantum Physics                                                    1.100e-02
+## subcategoryRadiology and Imaging                                              2.023e-02
+## subcategoryRehabilitation Medicine and Physical Therapy                       3.235e-02
+## subcategoryRepresentation Theory                                              1.269e-02
+## subcategoryRespiratory Medicine                                               2.096e-02
+## subcategoryRheumatology                                                       3.130e-02
+## subcategoryRings and Algebras                                                 1.365e-02
+## subcategoryRisk Management                                                    3.012e-02
+## subcategoryRobotics                                                           1.147e-02
+## subcategoryScientific Communication and Education                             1.849e-02
+## subcategorySexual and Reproductive Health                                     5.210e-02
+## subcategorySignal Processing                                                  1.142e-02
+## subcategorySocial and Information Networks                                    1.182e-02
+## subcategorySoft Condensed Matter                                              1.177e-02
+## subcategorySoftware Engineering                                               1.237e-02
+## subcategorySolar and Stellar Astrophysics                                     1.117e-02
+## subcategorySound                                                              1.245e-02
+## subcategorySpace Physics                                                      1.472e-02
+## subcategorySpectral Theory                                                    1.530e-02
+## subcategorySports Medicine                                                    6.638e-02
+## subcategoryStatistical Finance                                                2.355e-02
+## subcategoryStatistical Mechanics                                              1.161e-02
+## subcategoryStatistics Theory                                                  1.163e-02
+## subcategoryStrongly Correlated Electrons                                      1.132e-02
+## subcategorySubcellular Processes                                              2.739e-02
+## subcategorySuperconductivity                                                  1.186e-02
+## subcategorySurgery                                                            5.373e-02
+## subcategorySymbolic Computation                                               2.229e-02
+## subcategorySymplectic Geometry                                                1.633e-02
+## subcategorySynthetic Biology                                                  1.541e-02
+## subcategorySystems and Control                                                1.131e-02
+## subcategorySystems Biology                                                    1.239e-02
+## subcategoryTheoretical Economics                                              2.622e-02
+## subcategoryTissues and Organs                                                 1.810e-02
+## subcategoryTrading and Market Microstructure                                  3.992e-02
+## subcategoryZoology                                                            2.140e-02
+##                                                                              t value
+## (Intercept)                                                                   79.875
+## month                                                                        -13.264
+## covidTRUE                                                                      2.453
+## covidpaperTRUE                                                                 3.780
+## subcategoryAdaptation and Self-Organizing Systems                             -1.037
+## subcategoryAddiction Medicine                                                 -3.969
+## subcategoryAlgebraic Geometry                                                  0.912
+## subcategoryAlgebraic Topology                                                  0.686
+## subcategoryAllergy and Immunology                                             -8.166
+## subcategoryAnalysis of PDEs                                                   -1.585
+## subcategoryAnimal Behavior and Cognition                                     -12.546
+## subcategoryApplications                                                       -6.209
+## subcategoryApplied Physics                                                    -1.749
+## subcategoryArtificial Intelligence                                            -1.320
+## subcategoryAstrophysics of Galaxies                                           -8.284
+## subcategoryAtmospheric and Oceanic Physics                                    -0.513
+## subcategoryAtomic and Molecular Clusters                                      -1.156
+## subcategoryAtomic Physics                                                      1.452
+## subcategoryAudio and Speech Processing                                        -1.476
+## subcategoryBiochemistry                                                      -14.061
+## subcategoryBioengineering                                                    -10.655
+## subcategoryBioinformatics                                                     -9.998
+## subcategoryBiological Physics                                                 -3.511
+## subcategoryBiomolecules                                                       -4.656
+## subcategoryBiophysics                                                         -8.741
+## subcategoryCancer Biology                                                    -18.598
+## subcategoryCardiovascular Medicine                                            -5.406
+## subcategoryCategory Theory                                                     0.363
+## subcategoryCell Behavior                                                      -5.240
+## subcategoryCell Biology                                                      -19.118
+## subcategoryChaotic Dynamics                                                    1.017
+## subcategoryChemical Physics                                                   -0.907
+## subcategoryClassical Analysis and ODEs                                        -0.336
+## subcategoryClassical Physics                                                   1.322
+## subcategoryClinical Trials                                                    -8.120
+## subcategoryCombinatorics                                                      -2.368
+## subcategoryCommutative Algebra                                                -1.797
+## subcategoryComplex Variables                                                   1.740
+## subcategoryComputation                                                         0.852
+## subcategoryComputation and Language                                           -5.016
+## subcategoryComputational Complexity                                            2.675
+## subcategoryComputational Engineering, Finance, and Science                     0.796
+## subcategoryComputational Finance                                               1.481
+## subcategoryComputational Geometry                                              1.383
+## subcategoryComputational Physics                                               2.127
+## subcategoryComputer Science and Game Theory                                   -0.007
+## subcategoryComputer Vision and Pattern Recognition                            -2.498
+## subcategoryComputers and Society                                              -6.667
+## subcategoryCosmology and Nongalactic Astrophysics                             -2.928
+## subcategoryCryptography and Security                                          -0.727
+## subcategoryData Analysis, Statistics and Probability                           0.375
+## subcategoryData Structures and Algorithms                                      1.727
+## subcategoryDatabases                                                          -1.932
+## subcategoryDermatology                                                        -2.022
+## subcategoryDevelopmental Biology                                             -19.785
+## subcategoryDifferential Geometry                                               1.209
+## subcategoryDigital Libraries                                                  -4.412
+## subcategoryDiscrete Mathematics                                                1.055
+## subcategoryDisordered Systems and Neural Networks                              0.791
+## subcategoryDistributed, Parallel, and Cluster Computing                        0.585
+## subcategoryDynamical Systems                                                  -1.041
+## subcategoryEarth and Planetary Astrophysics                                   -5.490
+## subcategoryEcology                                                           -13.827
+## subcategoryEconometrics                                                        1.794
+## subcategoryEconomics                                                          -0.282
+## subcategoryEmergency Medicine                                                 -3.642
+## subcategoryEmerging Technologies                                               1.303
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease)  -7.202
+## subcategoryEpidemiology                                                      -16.168
+## subcategoryEvolutionary Biology                                              -13.366
+## subcategoryExactly Solvable and Integrable Systems                            -0.178
+## subcategoryFluid Dynamics                                                      2.300
+## subcategoryFormal Languages and Automata Theory                                0.146
+## subcategoryFunctional Analysis                                                 0.499
+## subcategoryGastroenterology                                                   -7.961
+## subcategoryGeneral Economics                                                  -0.732
+## subcategoryGeneral Finance                                                     0.656
+## subcategoryGeneral Literature                                                  0.570
+## subcategoryGeneral Mathematics                                                 0.672
+## subcategoryGeneral Physics                                                     1.593
+## subcategoryGeneral Relativity and Quantum Cosmology                            0.980
+## subcategoryGeneral Topology                                                    0.848
+## subcategoryGenetic and Genomic Medicine                                      -17.719
+## subcategoryGenetics                                                          -18.832
+## subcategoryGenomics                                                          -15.974
+## subcategoryGeometric Topology                                                 -1.878
+## subcategoryGeophysics                                                         -0.614
+## subcategoryGeriatric Medicine                                                 -0.567
+## subcategoryGraphics                                                            0.453
+## subcategoryGroup Theory                                                        0.384
+## subcategoryHardware Architecture                                               1.436
+## subcategoryHealth Economics                                                   -0.874
+## subcategoryHealth Informatics                                                 -7.231
+## subcategoryHealth Policy                                                      -5.809
+## subcategoryHealth Systems and Quality Improvement                             -7.129
+## subcategoryHematology                                                         -2.298
+## subcategoryHigh Energy Astrophysical Phenomena                                -3.677
+## subcategoryHigh Energy Physics - Experiment                                   -2.042
+## subcategoryHigh Energy Physics - Lattice                                       4.065
+## subcategoryHigh Energy Physics - Phenomenology                                -0.037
+## subcategoryHigh Energy Physics - Theory                                        3.404
+## subcategoryHistory and Overview                                               -0.941
+## subcategoryHistory and Philosophy of Physics                                   1.587
+## subcategoryHIV/AIDS                                                           -6.614
+## subcategoryHuman-Computer Interaction                                         -6.549
+## subcategoryImage and Video Processing                                         -2.833
+## subcategoryImmunology                                                        -20.303
+## subcategoryInfectious Diseases (except HIV/AIDS)                             -17.895
+## subcategoryInformation Retrieval                                              -4.168
+## subcategoryInformation Theory                                                 -1.891
+## subcategoryInstrumentation and Detectors                                      -0.404
+## subcategoryInstrumentation and Methods for Astrophysics                       -3.266
+## subcategoryIntensive Care and Critical Care Medicine                          -5.935
+## subcategoryK-Theory and Homology                                               2.419
+## subcategoryLogic                                                               3.388
+## subcategoryLogic in Computer Science                                           2.032
+## subcategoryMachine Learning                                                   -0.345
+## subcategoryMaterials Science                                                  -2.264
+## subcategoryMathematical Finance                                               -0.711
+## subcategoryMathematical Physics                                                3.606
+## subcategoryMathematical Software                                               1.876
+## subcategoryMedical Education                                                  -1.140
+## subcategoryMedical Physics                                                    -4.060
+## subcategoryMesoscale and Nanoscale Physics                                     0.486
+## subcategoryMethodology                                                        -4.194
+## subcategoryMetric Geometry                                                     1.734
+## subcategoryMicrobiology                                                      -19.763
+## subcategoryMolecular Biology                                                 -17.253
+## subcategoryMolecular Networks                                                 -4.139
+## subcategoryMultiagent Systems                                                  0.158
+## subcategoryMultimedia                                                         -2.943
+## subcategoryNephrology                                                         -5.127
+## subcategoryNetworking and Internet Architecture                               -0.579
+## subcategoryNeural and Evolutionary Computing                                   1.028
+## subcategoryNeurology                                                         -11.418
+## subcategoryNeurons and Cognition                                              -5.296
+## subcategoryNeuroscience                                                      -16.325
+## subcategoryNuclear Experiment                                                 -2.045
+## subcategoryNuclear Theory                                                     -0.393
+## subcategoryNumber Theory                                                       1.541
+## subcategoryNumerical Analysis                                                 -0.348
+## subcategoryNutrition                                                          -5.182
+## subcategoryObstetrics and Gynecology                                          -7.002
+## subcategoryOccupational and Environmental Health                              -8.060
+## subcategoryOncology                                                          -13.354
+## subcategoryOperating Systems                                                   1.169
+## subcategoryOperator Algebras                                                   1.868
+## subcategoryOphthalmology                                                      -5.315
+## subcategoryOptics                                                              0.124
+## subcategoryOptimization and Control                                            0.244
+## subcategoryOther Computer Science                                             -0.800
+## subcategoryOther Condensed Matter                                              1.376
+## subcategoryOther Quantitative Biology                                         -1.987
+## subcategoryOther Statistics                                                   -2.411
+## subcategoryOtolaryngology                                                     -1.277
+## subcategoryPaleontology                                                       -0.555
+## subcategoryPathology                                                         -15.068
+## subcategoryPattern Formation and Solitons                                      0.685
+## subcategoryPediatrics                                                         -8.973
+## subcategoryPerformance                                                         0.867
+## subcategoryPharmacology and Therapeutics                                      -6.060
+## subcategoryPharmacology and Toxicology                                       -13.895
+## subcategoryPhysics and Society                                                -2.094
+## subcategoryPhysics Education                                                  -7.175
+## subcategoryPhysiology                                                        -15.917
+## subcategoryPlant Biology                                                     -16.073
+## subcategoryPlasma Physics                                                      2.133
+## subcategoryPopular Physics                                                    -1.538
+## subcategoryPopulations and Evolution                                          -6.004
+## subcategoryPortfolio Management                                               -0.744
+## subcategoryPricing of Securities                                               1.933
+## subcategoryPrimary Care Research                                              -3.946
+## subcategoryProbability                                                         0.880
+## subcategoryProgramming Languages                                               2.412
+## subcategoryPsychiatry and Clinical Psychology                                -14.599
+## subcategoryPublic and Global Health                                          -16.114
+## subcategoryQuantitative Methods                                               -6.782
+## subcategoryQuantum Algebra                                                     0.128
+## subcategoryQuantum Gases                                                       1.819
+## subcategoryQuantum Physics                                                     1.347
+## subcategoryRadiology and Imaging                                              -7.776
+## subcategoryRehabilitation Medicine and Physical Therapy                       -8.635
+## subcategoryRepresentation Theory                                              -1.415
+## subcategoryRespiratory Medicine                                               -8.398
+## subcategoryRheumatology                                                       -8.919
+## subcategoryRings and Algebras                                                 -2.851
+## subcategoryRisk Management                                                    -0.315
+## subcategoryRobotics                                                            1.087
+## subcategoryScientific Communication and Education                            -15.053
+## subcategorySexual and Reproductive Health                                     -8.330
+## subcategorySignal Processing                                                  -1.714
+## subcategorySocial and Information Networks                                    -3.654
+## subcategorySoft Condensed Matter                                              -1.611
+## subcategorySoftware Engineering                                               -0.231
+## subcategorySolar and Stellar Astrophysics                                     -6.299
+## subcategorySound                                                              -1.083
+## subcategorySpace Physics                                                      -1.663
+## subcategorySpectral Theory                                                     0.425
+## subcategorySports Medicine                                                    -0.963
+## subcategoryStatistical Finance                                                 0.138
+## subcategoryStatistical Mechanics                                               2.957
+## subcategoryStatistics Theory                                                  -2.216
+## subcategoryStrongly Correlated Electrons                                       1.322
+## subcategorySubcellular Processes                                              -2.781
+## subcategorySuperconductivity                                                  -0.075
+## subcategorySurgery                                                            -2.322
+## subcategorySymbolic Computation                                                1.042
+## subcategorySymplectic Geometry                                                 0.624
+## subcategorySynthetic Biology                                                  -9.275
+## subcategorySystems and Control                                                 1.211
+## subcategorySystems Biology                                                   -10.468
+## subcategoryTheoretical Economics                                               1.957
+## subcategoryTissues and Organs                                                 -7.250
+## subcategoryTrading and Market Microstructure                                   0.810
+## subcategoryZoology                                                            -7.759
+##                                                                              Pr(>|t|)
+## (Intercept)                                                                   < 2e-16
+## month                                                                         < 2e-16
+## covidTRUE                                                                    0.014213
+## covidpaperTRUE                                                               0.000160
+## subcategoryAdaptation and Self-Organizing Systems                            0.299895
+## subcategoryAddiction Medicine                                                7.36e-05
+## subcategoryAlgebraic Geometry                                                0.361774
+## subcategoryAlgebraic Topology                                                0.493010
+## subcategoryAllergy and Immunology                                            4.49e-16
+## subcategoryAnalysis of PDEs                                                  0.112980
+## subcategoryAnimal Behavior and Cognition                                      < 2e-16
+## subcategoryApplications                                                      5.99e-10
+## subcategoryApplied Physics                                                   0.080352
+## subcategoryArtificial Intelligence                                           0.186779
+## subcategoryAstrophysics of Galaxies                                           < 2e-16
+## subcategoryAtmospheric and Oceanic Physics                                   0.607793
+## subcategoryAtomic and Molecular Clusters                                     0.247739
+## subcategoryAtomic Physics                                                    0.146662
+## subcategoryAudio and Speech Processing                                       0.140088
+## subcategoryBiochemistry                                                       < 2e-16
+## subcategoryBioengineering                                                     < 2e-16
+## subcategoryBioinformatics                                                     < 2e-16
+## subcategoryBiological Physics                                                0.000453
+## subcategoryBiomolecules                                                      3.36e-06
+## subcategoryBiophysics                                                         < 2e-16
+## subcategoryCancer Biology                                                     < 2e-16
+## subcategoryCardiovascular Medicine                                           6.92e-08
+## subcategoryCategory Theory                                                   0.716500
+## subcategoryCell Behavior                                                     1.71e-07
+## subcategoryCell Biology                                                       < 2e-16
+## subcategoryChaotic Dynamics                                                  0.309019
+## subcategoryChemical Physics                                                  0.364504
+## subcategoryClassical Analysis and ODEs                                       0.736946
+## subcategoryClassical Physics                                                 0.186375
+## subcategoryClinical Trials                                                   6.54e-16
+## subcategoryCombinatorics                                                     0.017946
+## subcategoryCommutative Algebra                                               0.072424
+## subcategoryComplex Variables                                                 0.081905
+## subcategoryComputation                                                       0.394535
+## subcategoryComputation and Language                                          5.55e-07
+## subcategoryComputational Complexity                                          0.007513
+## subcategoryComputational Engineering, Finance, and Science                   0.425918
+## subcategoryComputational Finance                                             0.138621
+## subcategoryComputational Geometry                                            0.166661
+## subcategoryComputational Physics                                             0.033490
+## subcategoryComputer Science and Game Theory                                  0.994376
+## subcategoryComputer Vision and Pattern Recognition                           0.012536
+## subcategoryComputers and Society                                             3.04e-11
+## subcategoryCosmology and Nongalactic Astrophysics                            0.003432
+## subcategoryCryptography and Security                                         0.467008
+## subcategoryData Analysis, Statistics and Probability                         0.707713
+## subcategoryData Structures and Algorithms                                    0.084177
+## subcategoryDatabases                                                         0.053402
+## subcategoryDermatology                                                       0.043304
+## subcategoryDevelopmental Biology                                              < 2e-16
+## subcategoryDifferential Geometry                                             0.226828
+## subcategoryDigital Libraries                                                 1.06e-05
+## subcategoryDiscrete Mathematics                                              0.291344
+## subcategoryDisordered Systems and Neural Networks                            0.428999
+## subcategoryDistributed, Parallel, and Cluster Computing                      0.558888
+## subcategoryDynamical Systems                                                 0.298109
+## subcategoryEarth and Planetary Astrophysics                                  4.32e-08
+## subcategoryEcology                                                            < 2e-16
+## subcategoryEconometrics                                                      0.072910
+## subcategoryEconomics                                                         0.777764
+## subcategoryEmergency Medicine                                                0.000275
+## subcategoryEmerging Technologies                                             0.192605
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease) 7.33e-13
+## subcategoryEpidemiology                                                       < 2e-16
+## subcategoryEvolutionary Biology                                               < 2e-16
+## subcategoryExactly Solvable and Integrable Systems                           0.858662
+## subcategoryFluid Dynamics                                                    0.021512
+## subcategoryFormal Languages and Automata Theory                              0.884055
+## subcategoryFunctional Analysis                                               0.617738
+## subcategoryGastroenterology                                                  2.34e-15
+## subcategoryGeneral Economics                                                 0.464499
+## subcategoryGeneral Finance                                                   0.512102
+## subcategoryGeneral Literature                                                0.568790
+## subcategoryGeneral Mathematics                                               0.501536
+## subcategoryGeneral Physics                                                   0.111175
+## subcategoryGeneral Relativity and Quantum Cosmology                          0.327303
+## subcategoryGeneral Topology                                                  0.396508
+## subcategoryGenetic and Genomic Medicine                                       < 2e-16
+## subcategoryGenetics                                                           < 2e-16
+## subcategoryGenomics                                                           < 2e-16
+## subcategoryGeometric Topology                                                0.060451
+## subcategoryGeophysics                                                        0.539405
+## subcategoryGeriatric Medicine                                                0.571089
+## subcategoryGraphics                                                          0.650666
+## subcategoryGroup Theory                                                      0.700705
+## subcategoryHardware Architecture                                             0.151066
+## subcategoryHealth Economics                                                  0.382394
+## subcategoryHealth Informatics                                                5.94e-13
+## subcategoryHealth Policy                                                     6.91e-09
+## subcategoryHealth Systems and Quality Improvement                            1.24e-12
+## subcategoryHematology                                                        0.021633
+## subcategoryHigh Energy Astrophysical Phenomena                               0.000240
+## subcategoryHigh Energy Physics - Experiment                                  0.041241
+## subcategoryHigh Energy Physics - Lattice                                     4.92e-05
+## subcategoryHigh Energy Physics - Phenomenology                               0.970440
+## subcategoryHigh Energy Physics - Theory                                      0.000672
+## subcategoryHistory and Overview                                              0.346882
+## subcategoryHistory and Philosophy of Physics                                 0.112549
+## subcategoryHIV/AIDS                                                          4.37e-11
+## subcategoryHuman-Computer Interaction                                        6.69e-11
+## subcategoryImage and Video Processing                                        0.004633
+## subcategoryImmunology                                                         < 2e-16
+## subcategoryInfectious Diseases (except HIV/AIDS)                              < 2e-16
+## subcategoryInformation Retrieval                                             3.16e-05
+## subcategoryInformation Theory                                                0.058676
+## subcategoryInstrumentation and Detectors                                     0.685982
+## subcategoryInstrumentation and Methods for Astrophysics                      0.001103
+## subcategoryIntensive Care and Critical Care Medicine                         3.25e-09
+## subcategoryK-Theory and Homology                                             0.015611
+## subcategoryLogic                                                             0.000711
+## subcategoryLogic in Computer Science                                         0.042274
+## subcategoryMachine Learning                                                  0.729747
+## subcategoryMaterials Science                                                 0.023644
+## subcategoryMathematical Finance                                              0.477257
+## subcategoryMathematical Physics                                              0.000316
+## subcategoryMathematical Software                                             0.060726
+## subcategoryMedical Education                                                 0.254456
+## subcategoryMedical Physics                                                   5.02e-05
+## subcategoryMesoscale and Nanoscale Physics                                   0.627104
+## subcategoryMethodology                                                       2.82e-05
+## subcategoryMetric Geometry                                                   0.082948
+## subcategoryMicrobiology                                                       < 2e-16
+## subcategoryMolecular Biology                                                  < 2e-16
+## subcategoryMolecular Networks                                                3.58e-05
+## subcategoryMultiagent Systems                                                0.874831
+## subcategoryMultimedia                                                        0.003271
+## subcategoryNephrology                                                        3.11e-07
+## subcategoryNetworking and Internet Architecture                              0.562673
+## subcategoryNeural and Evolutionary Computing                                 0.303995
+## subcategoryNeurology                                                          < 2e-16
+## subcategoryNeurons and Cognition                                             1.26e-07
+## subcategoryNeuroscience                                                       < 2e-16
+## subcategoryNuclear Experiment                                                0.040960
+## subcategoryNuclear Theory                                                    0.694268
+## subcategoryNumber Theory                                                     0.123343
+## subcategoryNumerical Analysis                                                0.727963
+## subcategoryNutrition                                                         2.33e-07
+## subcategoryObstetrics and Gynecology                                         3.05e-12
+## subcategoryOccupational and Environmental Health                             1.06e-15
+## subcategoryOncology                                                           < 2e-16
+## subcategoryOperating Systems                                                 0.242595
+## subcategoryOperator Algebras                                                 0.061797
+## subcategoryOphthalmology                                                     1.14e-07
+## subcategoryOptics                                                            0.900948
+## subcategoryOptimization and Control                                          0.807180
+## subcategoryOther Computer Science                                            0.423544
+## subcategoryOther Condensed Matter                                            0.168807
+## subcategoryOther Quantitative Biology                                        0.047058
+## subcategoryOther Statistics                                                  0.015975
+## subcategoryOtolaryngology                                                    0.201651
+## subcategoryPaleontology                                                      0.579127
+## subcategoryPathology                                                          < 2e-16
+## subcategoryPattern Formation and Solitons                                    0.493502
+## subcategoryPediatrics                                                         < 2e-16
+## subcategoryPerformance                                                       0.385805
+## subcategoryPharmacology and Therapeutics                                     1.52e-09
+## subcategoryPharmacology and Toxicology                                        < 2e-16
+## subcategoryPhysics and Society                                               0.036342
+## subcategoryPhysics Education                                                 8.94e-13
+## subcategoryPhysiology                                                         < 2e-16
+## subcategoryPlant Biology                                                      < 2e-16
+## subcategoryPlasma Physics                                                    0.032998
+## subcategoryPopular Physics                                                   0.124143
+## subcategoryPopulations and Evolution                                         2.14e-09
+## subcategoryPortfolio Management                                              0.456889
+## subcategoryPricing of Securities                                             0.053336
+## subcategoryPrimary Care Research                                             8.12e-05
+## subcategoryProbability                                                       0.378823
+## subcategoryProgramming Languages                                             0.015930
+## subcategoryPsychiatry and Clinical Psychology                                 < 2e-16
+## subcategoryPublic and Global Health                                           < 2e-16
+## subcategoryQuantitative Methods                                              1.40e-11
+## subcategoryQuantum Algebra                                                   0.897886
+## subcategoryQuantum Gases                                                     0.068931
+## subcategoryQuantum Physics                                                   0.178111
+## subcategoryRadiology and Imaging                                             9.95e-15
+## subcategoryRehabilitation Medicine and Physical Therapy                       < 2e-16
+## subcategoryRepresentation Theory                                             0.157190
+## subcategoryRespiratory Medicine                                               < 2e-16
+## subcategoryRheumatology                                                       < 2e-16
+## subcategoryRings and Algebras                                                0.004392
+## subcategoryRisk Management                                                   0.752963
+## subcategoryRobotics                                                          0.277191
+## subcategoryScientific Communication and Education                             < 2e-16
+## subcategorySexual and Reproductive Health                                     < 2e-16
+## subcategorySignal Processing                                                 0.086590
+## subcategorySocial and Information Networks                                   0.000262
+## subcategorySoft Condensed Matter                                             0.107383
+## subcategorySoftware Engineering                                              0.817538
+## subcategorySolar and Stellar Astrophysics                                    3.39e-10
+## subcategorySound                                                             0.278846
+## subcategorySpace Physics                                                     0.096390
+## subcategorySpectral Theory                                                   0.670797
+## subcategorySports Medicine                                                   0.335706
+## subcategoryStatistical Finance                                               0.889995
+## subcategoryStatistical Mechanics                                             0.003133
+## subcategoryStatistics Theory                                                 0.026785
+## subcategoryStrongly Correlated Electrons                                     0.186173
+## subcategorySubcellular Processes                                             0.005453
+## subcategorySuperconductivity                                                 0.940437
+## subcategorySurgery                                                           0.020302
+## subcategorySymbolic Computation                                              0.297619
+## subcategorySymplectic Geometry                                               0.532848
+## subcategorySynthetic Biology                                                  < 2e-16
+## subcategorySystems and Control                                               0.225895
+## subcategorySystems Biology                                                    < 2e-16
+## subcategoryTheoretical Economics                                             0.050490
+## subcategoryTissues and Organs                                                5.19e-13
+## subcategoryTrading and Market Microstructure                                 0.418246
+## subcategoryZoology                                                           1.14e-14
+##                                                                                 
+## (Intercept)                                                                  ***
+## month                                                                        ***
+## covidTRUE                                                                    *  
+## covidpaperTRUE                                                               ***
+## subcategoryAdaptation and Self-Organizing Systems                               
+## subcategoryAddiction Medicine                                                ***
+## subcategoryAlgebraic Geometry                                                   
+## subcategoryAlgebraic Topology                                                   
+## subcategoryAllergy and Immunology                                            ***
+## subcategoryAnalysis of PDEs                                                     
+## subcategoryAnimal Behavior and Cognition                                     ***
+## subcategoryApplications                                                      ***
+## subcategoryApplied Physics                                                   .  
+## subcategoryArtificial Intelligence                                              
+## subcategoryAstrophysics of Galaxies                                          ***
+## subcategoryAtmospheric and Oceanic Physics                                      
+## subcategoryAtomic and Molecular Clusters                                        
+## subcategoryAtomic Physics                                                       
+## subcategoryAudio and Speech Processing                                          
+## subcategoryBiochemistry                                                      ***
+## subcategoryBioengineering                                                    ***
+## subcategoryBioinformatics                                                    ***
+## subcategoryBiological Physics                                                ***
+## subcategoryBiomolecules                                                      ***
+## subcategoryBiophysics                                                        ***
+## subcategoryCancer Biology                                                    ***
+## subcategoryCardiovascular Medicine                                           ***
+## subcategoryCategory Theory                                                      
+## subcategoryCell Behavior                                                     ***
+## subcategoryCell Biology                                                      ***
+## subcategoryChaotic Dynamics                                                     
+## subcategoryChemical Physics                                                     
+## subcategoryClassical Analysis and ODEs                                          
+## subcategoryClassical Physics                                                    
+## subcategoryClinical Trials                                                   ***
+## subcategoryCombinatorics                                                     *  
+## subcategoryCommutative Algebra                                               .  
+## subcategoryComplex Variables                                                 .  
+## subcategoryComputation                                                          
+## subcategoryComputation and Language                                          ***
+## subcategoryComputational Complexity                                          ** 
+## subcategoryComputational Engineering, Finance, and Science                      
+## subcategoryComputational Finance                                                
+## subcategoryComputational Geometry                                               
+## subcategoryComputational Physics                                             *  
+## subcategoryComputer Science and Game Theory                                     
+## subcategoryComputer Vision and Pattern Recognition                           *  
+## subcategoryComputers and Society                                             ***
+## subcategoryCosmology and Nongalactic Astrophysics                            ** 
+## subcategoryCryptography and Security                                            
+## subcategoryData Analysis, Statistics and Probability                            
+## subcategoryData Structures and Algorithms                                    .  
+## subcategoryDatabases                                                         .  
+## subcategoryDermatology                                                       *  
+## subcategoryDevelopmental Biology                                             ***
+## subcategoryDifferential Geometry                                                
+## subcategoryDigital Libraries                                                 ***
+## subcategoryDiscrete Mathematics                                                 
+## subcategoryDisordered Systems and Neural Networks                               
+## subcategoryDistributed, Parallel, and Cluster Computing                         
+## subcategoryDynamical Systems                                                    
+## subcategoryEarth and Planetary Astrophysics                                  ***
+## subcategoryEcology                                                           ***
+## subcategoryEconometrics                                                      .  
+## subcategoryEconomics                                                            
+## subcategoryEmergency Medicine                                                ***
+## subcategoryEmerging Technologies                                                
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease) ***
+## subcategoryEpidemiology                                                      ***
+## subcategoryEvolutionary Biology                                              ***
+## subcategoryExactly Solvable and Integrable Systems                              
+## subcategoryFluid Dynamics                                                    *  
+## subcategoryFormal Languages and Automata Theory                                 
+## subcategoryFunctional Analysis                                                  
+## subcategoryGastroenterology                                                  ***
+## subcategoryGeneral Economics                                                    
+## subcategoryGeneral Finance                                                      
+## subcategoryGeneral Literature                                                   
+## subcategoryGeneral Mathematics                                                  
+## subcategoryGeneral Physics                                                      
+## subcategoryGeneral Relativity and Quantum Cosmology                             
+## subcategoryGeneral Topology                                                     
+## subcategoryGenetic and Genomic Medicine                                      ***
+## subcategoryGenetics                                                          ***
+## subcategoryGenomics                                                          ***
+## subcategoryGeometric Topology                                                .  
+## subcategoryGeophysics                                                           
+## subcategoryGeriatric Medicine                                                   
+## subcategoryGraphics                                                             
+## subcategoryGroup Theory                                                         
+## subcategoryHardware Architecture                                                
+## subcategoryHealth Economics                                                     
+## subcategoryHealth Informatics                                                ***
+## subcategoryHealth Policy                                                     ***
+## subcategoryHealth Systems and Quality Improvement                            ***
+## subcategoryHematology                                                        *  
+## subcategoryHigh Energy Astrophysical Phenomena                               ***
+## subcategoryHigh Energy Physics - Experiment                                  *  
+## subcategoryHigh Energy Physics - Lattice                                     ***
+## subcategoryHigh Energy Physics - Phenomenology                                  
+## subcategoryHigh Energy Physics - Theory                                      ***
+## subcategoryHistory and Overview                                                 
+## subcategoryHistory and Philosophy of Physics                                    
+## subcategoryHIV/AIDS                                                          ***
+## subcategoryHuman-Computer Interaction                                        ***
+## subcategoryImage and Video Processing                                        ** 
+## subcategoryImmunology                                                        ***
+## subcategoryInfectious Diseases (except HIV/AIDS)                             ***
+## subcategoryInformation Retrieval                                             ***
+## subcategoryInformation Theory                                                .  
+## subcategoryInstrumentation and Detectors                                        
+## subcategoryInstrumentation and Methods for Astrophysics                      ** 
+## subcategoryIntensive Care and Critical Care Medicine                         ***
+## subcategoryK-Theory and Homology                                             *  
+## subcategoryLogic                                                             ***
+## subcategoryLogic in Computer Science                                         *  
+## subcategoryMachine Learning                                                     
+## subcategoryMaterials Science                                                 *  
+## subcategoryMathematical Finance                                                 
+## subcategoryMathematical Physics                                              ***
+## subcategoryMathematical Software                                             .  
+## subcategoryMedical Education                                                    
+## subcategoryMedical Physics                                                   ***
+## subcategoryMesoscale and Nanoscale Physics                                      
+## subcategoryMethodology                                                       ***
+## subcategoryMetric Geometry                                                   .  
+## subcategoryMicrobiology                                                      ***
+## subcategoryMolecular Biology                                                 ***
+## subcategoryMolecular Networks                                                ***
+## subcategoryMultiagent Systems                                                   
+## subcategoryMultimedia                                                        ** 
+## subcategoryNephrology                                                        ***
+## subcategoryNetworking and Internet Architecture                                 
+## subcategoryNeural and Evolutionary Computing                                    
+## subcategoryNeurology                                                         ***
+## subcategoryNeurons and Cognition                                             ***
+## subcategoryNeuroscience                                                      ***
+## subcategoryNuclear Experiment                                                *  
+## subcategoryNuclear Theory                                                       
+## subcategoryNumber Theory                                                        
+## subcategoryNumerical Analysis                                                   
+## subcategoryNutrition                                                         ***
+## subcategoryObstetrics and Gynecology                                         ***
+## subcategoryOccupational and Environmental Health                             ***
+## subcategoryOncology                                                          ***
+## subcategoryOperating Systems                                                    
+## subcategoryOperator Algebras                                                 .  
+## subcategoryOphthalmology                                                     ***
+## subcategoryOptics                                                               
+## subcategoryOptimization and Control                                             
+## subcategoryOther Computer Science                                               
+## subcategoryOther Condensed Matter                                               
+## subcategoryOther Quantitative Biology                                        *  
+## subcategoryOther Statistics                                                  *  
+## subcategoryOtolaryngology                                                       
+## subcategoryPaleontology                                                         
+## subcategoryPathology                                                         ***
+## subcategoryPattern Formation and Solitons                                       
+## subcategoryPediatrics                                                        ***
+## subcategoryPerformance                                                          
+## subcategoryPharmacology and Therapeutics                                     ***
+## subcategoryPharmacology and Toxicology                                       ***
+## subcategoryPhysics and Society                                               *  
+## subcategoryPhysics Education                                                 ***
+## subcategoryPhysiology                                                        ***
+## subcategoryPlant Biology                                                     ***
+## subcategoryPlasma Physics                                                    *  
+## subcategoryPopular Physics                                                      
+## subcategoryPopulations and Evolution                                         ***
+## subcategoryPortfolio Management                                                 
+## subcategoryPricing of Securities                                             .  
+## subcategoryPrimary Care Research                                             ***
+## subcategoryProbability                                                          
+## subcategoryProgramming Languages                                             *  
+## subcategoryPsychiatry and Clinical Psychology                                ***
+## subcategoryPublic and Global Health                                          ***
+## subcategoryQuantitative Methods                                              ***
+## subcategoryQuantum Algebra                                                      
+## subcategoryQuantum Gases                                                     .  
+## subcategoryQuantum Physics                                                      
+## subcategoryRadiology and Imaging                                             ***
+## subcategoryRehabilitation Medicine and Physical Therapy                      ***
+## subcategoryRepresentation Theory                                                
+## subcategoryRespiratory Medicine                                              ***
+## subcategoryRheumatology                                                      ***
+## subcategoryRings and Algebras                                                ** 
+## subcategoryRisk Management                                                      
+## subcategoryRobotics                                                             
+## subcategoryScientific Communication and Education                            ***
+## subcategorySexual and Reproductive Health                                    ***
+## subcategorySignal Processing                                                 .  
+## subcategorySocial and Information Networks                                   ***
+## subcategorySoft Condensed Matter                                                
+## subcategorySoftware Engineering                                                 
+## subcategorySolar and Stellar Astrophysics                                    ***
+## subcategorySound                                                                
+## subcategorySpace Physics                                                     .  
+## subcategorySpectral Theory                                                      
+## subcategorySports Medicine                                                      
+## subcategoryStatistical Finance                                                  
+## subcategoryStatistical Mechanics                                             ** 
+## subcategoryStatistics Theory                                                 *  
+## subcategoryStrongly Correlated Electrons                                        
+## subcategorySubcellular Processes                                             ** 
+## subcategorySuperconductivity                                                    
+## subcategorySurgery                                                           *  
+## subcategorySymbolic Computation                                                 
+## subcategorySymplectic Geometry                                                  
+## subcategorySynthetic Biology                                                 ***
+## subcategorySystems and Control                                                  
+## subcategorySystems Biology                                                   ***
+## subcategoryTheoretical Economics                                             .  
+## subcategoryTissues and Organs                                                ***
+## subcategoryTrading and Market Microstructure                                    
+## subcategoryZoology                                                           ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Residual standard error: 0.4939 on 3258 degrees of freedom
+## Multiple R-squared:  0.9228, Adjusted R-squared:  0.9177 
+## F-statistic:   182 on 214 and 3258 DF,  p-value: < 2.2e-16
+

El intercept está mucho mejor (0.85), y covid y covidpaper siguen saliendo positivos.

+
+
+

2.1.2 lmer

+

Vamos directamente con las subcategorías a partir de ahora.

+
fit_lmer <- lmer(
+  r_male ~ month + covid + covidpaper + (1 | category/subcategory),
+  df.agg, weights=total)
+

Ojo a esto:

+
model_performance(fit_lmer)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
## pseudo-R2 = 0.7902072
+

Nice! Y los residuales:

+
model_assumptions(fit_lmer)
+
## `geom_smooth()` using formula 'y ~ x'
+

+

Pues siguen desviándose de la normal en las colas. Esto realmente es esperable, porque estamos con una proporción.

+
summary(fit_lmer)
+
## Linear mixed model fit by REML ['lmerMod']
+## Formula: r_male ~ month + covid + covidpaper + (1 | category/subcategory)
+##    Data: df.agg
+## Weights: total
+## 
+## REML criterion at convergence: -13318.4
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -4.4942 -0.6234  0.0511  0.6869  5.7916 
+## 
+## Random effects:
+##  Groups               Name        Variance Std.Dev.
+##  subcategory:category (Intercept) 0.001467 0.0383  
+##  category             (Intercept) 0.006905 0.0831  
+##  Residual                         0.245707 0.4957  
+## Number of obs: 3473, groups:  subcategory:category, 219; category, 8
+## 
+## Fixed effects:
+##                  Estimate Std. Error t value
+## (Intercept)     8.072e-01  2.967e-02  27.209
+## month          -5.309e-04  3.974e-05 -13.359
+## covidTRUE       2.858e-03  1.142e-03   2.502
+## covidpaperTRUE  1.426e-02  3.436e-03   4.149
+## 
+## Correlation of Fixed Effects:
+##             (Intr) month  covdTRUE
+## month       -0.029                
+## covidTRUE    0.010 -0.621         
+## covdpprTRUE -0.006  0.004 -0.117
+

Intecept de 0.80, que está muy bien. Variables covid y covidpaper positivas (spoiler: salen siempre positivas). Lo bueno del modelo de efectos mixtos es que le cambias subcategorías por categoría y, aunque sale peor, como el modelo lineal, el intercept es estable, sale lo mismo.

+
+
+
+

2.2 Binomial, un poco odd

+

Dado que la variable respuesta va de 0 a 1 de forma continua, lo suyo es un modelo binomial fraccional (binomial más los pesos de los counts).

+
+

2.2.1 glm

+

Primero lo primero:

+
fit_glm <- glm(
+  r_male ~ month + covid + covidpaper + subcategory,
+  df.agg, family=binomial, weights=total)
+

Que nos da:

+
model_performance(fit_glm)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
## pseudo-R2 = 0.8021809
+

Muy bien, similar a lo anterior, pero ahora:

+
model_assumptions(fit_glm)
+
## `geom_smooth()` using formula 'y ~ x'
+

+

Espectaculares residuales. No habéis visto unos residuales así ni en los ejercicios de clase. Finalmente, el chorizo:

+
summary(fit_glm)
+
## 
+## Call:
+## glm(formula = r_male ~ month + covid + covidpaper + subcategory, 
+##     family = binomial, data = df.agg, weights = total)
+## 
+## Deviance Residuals: 
+##     Min       1Q   Median       3Q      Max  
+## -5.4290  -0.7889   0.0597   0.8886   6.2448  
+## 
+## Coefficients:
+##                                                                                Estimate
+## (Intercept)                                                                   1.7906290
+## month                                                                        -0.0035878
+## covidTRUE                                                                     0.0201486
+## covidpaperTRUE                                                                0.0686645
+## subcategoryAdaptation and Self-Organizing Systems                            -0.1210853
+## subcategoryAddiction Medicine                                                -1.2245290
+## subcategoryAlgebraic Geometry                                                 0.0860267
+## subcategoryAlgebraic Topology                                                 0.0741083
+## subcategoryAllergy and Immunology                                            -1.3312728
+## subcategoryAnalysis of PDEs                                                  -0.1331888
+## subcategoryAnimal Behavior and Cognition                                     -1.0295705
+## subcategoryApplications                                                      -0.4889835
+## subcategoryApplied Physics                                                   -0.1469708
+## subcategoryArtificial Intelligence                                           -0.1099846
+## subcategoryAstrophysics of Galaxies                                          -0.5812649
+## subcategoryAtmospheric and Oceanic Physics                                   -0.0600170
+## subcategoryAtomic and Molecular Clusters                                     -0.1662142
+## subcategoryAtomic Physics                                                     0.1441790
+## subcategoryAudio and Speech Processing                                       -0.1360033
+## subcategoryBiochemistry                                                      -0.9485172
+## subcategoryBioengineering                                                    -0.7951614
+## subcategoryBioinformatics                                                    -0.6942248
+## subcategoryBiological Physics                                                -0.3071894
+## subcategoryBiomolecules                                                      -0.4995666
+## subcategoryBiophysics                                                        -0.6512642
+## subcategoryCancer Biology                                                    -1.1587973
+## subcategoryCardiovascular Medicine                                           -0.6514228
+## subcategoryCategory Theory                                                    0.0446107
+## subcategoryCell Behavior                                                     -0.6345724
+## subcategoryCell Biology                                                      -1.1781338
+## subcategoryChaotic Dynamics                                                   0.1224820
+## subcategoryChemical Physics                                                  -0.0809671
+## subcategoryClassical Analysis and ODEs                                       -0.0324691
+## subcategoryClassical Physics                                                  0.1702294
+## subcategoryClinical Trials                                                   -1.3085907
+## subcategoryCombinatorics                                                     -0.1946686
+## subcategoryCommutative Algebra                                               -0.1879465
+## subcategoryComplex Variables                                                  0.1990709
+## subcategoryComputation                                                        0.0923011
+## subcategoryComputation and Language                                          -0.3787754
+## subcategoryComputational Complexity                                           0.3197098
+## subcategoryComputational Engineering, Finance, and Science                    0.0930251
+## subcategoryComputational Finance                                              0.3256898
+## subcategoryComputational Geometry                                             0.1618636
+## subcategoryComputational Physics                                              0.2057750
+## subcategoryComputer Science and Game Theory                                  -0.0006528
+## subcategoryComputer Vision and Pattern Recognition                           -0.1962711
+## subcategoryComputers and Society                                             -0.5180530
+## subcategoryCosmology and Nongalactic Astrophysics                            -0.2335422
+## subcategoryCryptography and Security                                         -0.0639659
+## subcategoryData Analysis, Statistics and Probability                          0.0407120
+## subcategoryData Structures and Algorithms                                     0.1676698
+## subcategoryDatabases                                                         -0.1828150
+## subcategoryDermatology                                                       -0.9000102
+## subcategoryDevelopmental Biology                                             -1.2718727
+## subcategoryDifferential Geometry                                              0.1180697
+## subcategoryDigital Libraries                                                 -0.4591017
+## subcategoryDiscrete Mathematics                                               0.1118695
+## subcategoryDisordered Systems and Neural Networks                             0.0813752
+## subcategoryDistributed, Parallel, and Cluster Computing                       0.0527612
+## subcategoryDynamical Systems                                                 -0.0926232
+## subcategoryEarth and Planetary Astrophysics                                  -0.4155257
+## subcategoryEcology                                                           -0.9420202
+## subcategoryEconometrics                                                       0.2666873
+## subcategoryEconomics                                                         -0.0404329
+## subcategoryEmergency Medicine                                                -0.7892360
+## subcategoryEmerging Technologies                                              0.1546881
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease) -1.2240066
+## subcategoryEpidemiology                                                      -1.0550866
+## subcategoryEvolutionary Biology                                              -0.9117578
+## subcategoryExactly Solvable and Integrable Systems                           -0.0222482
+## subcategoryFluid Dynamics                                                     0.2298854
+## subcategoryFormal Languages and Automata Theory                               0.0178850
+## subcategoryFunctional Analysis                                                0.0484504
+## subcategoryGastroenterology                                                  -1.0872730
+## subcategoryGeneral Economics                                                 -0.1056557
+## subcategoryGeneral Finance                                                    0.1260228
+## subcategoryGeneral Literature                                                 0.3608829
+## subcategoryGeneral Mathematics                                                0.1857903
+## subcategoryGeneral Physics                                                    0.2789825
+## subcategoryGeneral Relativity and Quantum Cosmology                           0.0881830
+## subcategoryGeneral Topology                                                   0.1349127
+## subcategoryGenetic and Genomic Medicine                                      -1.3576512
+## subcategoryGenetics                                                          -1.1603148
+## subcategoryGenomics                                                          -1.0088927
+## subcategoryGeometric Topology                                                -0.1797397
+## subcategoryGeophysics                                                        -0.0672101
+## subcategoryGeriatric Medicine                                                -0.2695927
+## subcategoryGraphics                                                           0.0505288
+## subcategoryGroup Theory                                                       0.0396576
+## subcategoryHardware Architecture                                              0.1891817
+## subcategoryHealth Economics                                                  -0.2853155
+## subcategoryHealth Informatics                                                -0.8067880
+## subcategoryHealth Policy                                                     -1.0002238
+## subcategoryHealth Systems and Quality Improvement                            -1.1276784
+## subcategoryHematology                                                        -0.7994511
+## subcategoryHigh Energy Astrophysical Phenomena                               -0.2882461
+## subcategoryHigh Energy Physics - Experiment                                  -0.1732588
+## subcategoryHigh Energy Physics - Lattice                                      0.4866500
+## subcategoryHigh Energy Physics - Phenomenology                               -0.0028699
+## subcategoryHigh Energy Physics - Theory                                       0.3312903
+## subcategoryHistory and Overview                                              -0.1694207
+## subcategoryHistory and Philosophy of Physics                                  0.3050251
+## subcategoryHIV/AIDS                                                          -1.2938274
+## subcategoryHuman-Computer Interaction                                        -0.5170804
+## subcategoryImage and Video Processing                                        -0.2319061
+## subcategoryImmunology                                                        -1.2549246
+## subcategoryInfectious Diseases (except HIV/AIDS)                             -1.1548675
+## subcategoryInformation Retrieval                                             -0.3465199
+## subcategoryInformation Theory                                                -0.1523042
+## subcategoryInstrumentation and Detectors                                     -0.0359720
+## subcategoryInstrumentation and Methods for Astrophysics                      -0.2610586
+## subcategoryIntensive Care and Critical Care Medicine                         -0.7204437
+## subcategoryK-Theory and Homology                                              0.4058772
+## subcategoryLogic                                                              0.4312608
+## subcategoryLogic in Computer Science                                          0.2180962
+## subcategoryMachine Learning                                                  -0.0305112
+## subcategoryMaterials Science                                                 -0.1806757
+## subcategoryMathematical Finance                                              -0.1065532
+## subcategoryMathematical Physics                                               0.3523437
+## subcategoryMathematical Software                                              0.3394081
+## subcategoryMedical Education                                                 -0.5388001
+## subcategoryMedical Physics                                                   -0.3677675
+## subcategoryMesoscale and Nanoscale Physics                                    0.0423900
+## subcategoryMethodology                                                       -0.3417740
+## subcategoryMetric Geometry                                                    0.2123573
+## subcategoryMicrobiology                                                      -1.1879277
+## subcategoryMolecular Biology                                                 -1.1170126
+## subcategoryMolecular Networks                                                -0.4856762
+## subcategoryMultiagent Systems                                                 0.0149705
+## subcategoryMultimedia                                                        -0.2943841
+## subcategoryNephrology                                                        -1.2553441
+## subcategoryNetworking and Internet Architecture                              -0.0515179
+## subcategoryNeural and Evolutionary Computing                                  0.0984017
+## subcategoryNeurology                                                         -1.0304242
+## subcategoryNeurons and Cognition                                             -0.4540945
+## subcategoryNeuroscience                                                      -1.0074889
+## subcategoryNuclear Experiment                                                -0.1885058
+## subcategoryNuclear Theory                                                    -0.0357352
+## subcategoryNumber Theory                                                      0.1502925
+## subcategoryNumerical Analysis                                                -0.0314609
+## subcategoryNutrition                                                         -1.4512707
+## subcategoryObstetrics and Gynecology                                         -1.9751474
+## subcategoryOccupational and Environmental Health                             -1.1795888
+## subcategoryOncology                                                          -1.1736696
+## subcategoryOperating Systems                                                  0.3736746
+## subcategoryOperator Algebras                                                  0.2367342
+## subcategoryOphthalmology                                                     -1.1354651
+## subcategoryOptics                                                             0.0106452
+## subcategoryOptimization and Control                                           0.0207221
+## subcategoryOther Computer Science                                            -0.1533028
+## subcategoryOther Condensed Matter                                             0.1711205
+## subcategoryOther Quantitative Biology                                        -0.6030829
+## subcategoryOther Statistics                                                  -0.4762757
+## subcategoryOtolaryngology                                                    -0.4645108
+## subcategoryPaleontology                                                      -0.3033345
+## subcategoryPathology                                                         -1.2170190
+## subcategoryPattern Formation and Solitons                                     0.0849173
+## subcategoryPediatrics                                                        -1.5103521
+## subcategoryPerformance                                                        0.1022853
+## subcategoryPharmacology and Therapeutics                                     -1.0513773
+## subcategoryPharmacology and Toxicology                                       -1.0883474
+## subcategoryPhysics and Society                                               -0.1825646
+## subcategoryPhysics Education                                                 -0.8277015
+## subcategoryPhysiology                                                        -1.1656361
+## subcategoryPlant Biology                                                     -1.0717890
+## subcategoryPlasma Physics                                                     0.2345385
+## subcategoryPopular Physics                                                   -0.3898514
+## subcategoryPopulations and Evolution                                         -0.5051949
+## subcategoryPortfolio Management                                              -0.2549847
+## subcategoryPricing of Securities                                              0.8675079
+## subcategoryPrimary Care Research                                             -1.2140905
+## subcategoryProbability                                                        0.0805416
+## subcategoryProgramming Languages                                              0.2861451
+## subcategoryPsychiatry and Clinical Psychology                                -1.2779424
+## subcategoryPublic and Global Health                                          -1.2304036
+## subcategoryQuantitative Methods                                              -0.5504472
+## subcategoryQuantum Algebra                                                    0.0148351
+## subcategoryQuantum Gases                                                      0.1837496
+## subcategoryQuantum Physics                                                    0.1189275
+## subcategoryRadiology and Imaging                                             -0.9034832
+## subcategoryRehabilitation Medicine and Physical Therapy                      -1.4216956
+## subcategoryRepresentation Theory                                             -0.1322485
+## subcategoryRespiratory Medicine                                              -0.9921918
+## subcategoryRheumatology                                                      -1.4324740
+## subcategoryRings and Algebras                                                -0.2734746
+## subcategoryRisk Management                                                   -0.0731020
+## subcategoryRobotics                                                           0.0965292
+## subcategoryScientific Communication and Education                            -1.4287383
+## subcategorySexual and Reproductive Health                                    -2.0568828
+## subcategorySignal Processing                                                 -0.1446697
+## subcategorySocial and Information Networks                                   -0.3003763
+## subcategorySoft Condensed Matter                                             -0.1392464
+## subcategorySoftware Engineering                                              -0.0220942
+## subcategorySolar and Stellar Astrophysics                                    -0.4648535
+## subcategorySound                                                             -0.1016183
+## subcategorySpace Physics                                                     -0.1777003
+## subcategorySpectral Theory                                                    0.0513052
+## subcategorySports Medicine                                                   -0.4141740
+## subcategoryStatistical Finance                                                0.0216992
+## subcategoryStatistical Mechanics                                              0.2939068
+## subcategoryStatistics Theory                                                 -0.1865298
+## subcategoryStrongly Correlated Electrons                                      0.1195787
+## subcategorySubcellular Processes                                             -0.5010973
+## subcategorySuperconductivity                                                 -0.0063040
+## subcategorySurgery                                                           -0.7451258
+## subcategorySymbolic Computation                                               0.1915519
+## subcategorySymplectic Geometry                                                0.0808280
+## subcategorySynthetic Biology                                                 -0.8409853
+## subcategorySystems and Control                                                0.1048150
+## subcategorySystems Biology                                                   -0.7781593
+## subcategoryTheoretical Economics                                              0.4423263
+## subcategoryTissues and Organs                                                -0.7844892
+## subcategoryTrading and Market Microstructure                                  0.2580317
+## subcategoryZoology                                                           -0.9438615
+##                                                                              Std. Error
+## (Intercept)                                                                   0.0604344
+## month                                                                         0.0002091
+## covidTRUE                                                                     0.0058238
+## covidpaperTRUE                                                                0.0154456
+## subcategoryAdaptation and Self-Organizing Systems                             0.0870684
+## subcategoryAddiction Medicine                                                 0.2456514
+## subcategoryAlgebraic Geometry                                                 0.0669799
+## subcategoryAlgebraic Topology                                                 0.0778336
+## subcategoryAllergy and Immunology                                             0.1350824
+## subcategoryAnalysis of PDEs                                                   0.0638362
+## subcategoryAnimal Behavior and Cognition                                      0.0740245
+## subcategoryApplications                                                       0.0658323
+## subcategoryApplied Physics                                                    0.0637525
+## subcategoryArtificial Intelligence                                            0.0626257
+## subcategoryAstrophysics of Galaxies                                           0.0613736
+## subcategoryAtmospheric and Oceanic Physics                                    0.0850908
+## subcategoryAtomic and Molecular Clusters                                      0.1068218
+## subcategoryAtomic Physics                                                     0.0697183
+## subcategoryAudio and Speech Processing                                        0.0688973
+## subcategoryBiochemistry                                                       0.0640778
+## subcategoryBioengineering                                                     0.0670681
+## subcategoryBioinformatics                                                     0.0625130
+## subcategoryBiological Physics                                                 0.0689973
+## subcategoryBiomolecules                                                       0.0851478
+## subcategoryBiophysics                                                         0.0652723
+## subcategoryCancer Biology                                                     0.0627848
+## subcategoryCardiovascular Medicine                                            0.0968292
+## subcategoryCategory Theory                                                    0.0902727
+## subcategoryCell Behavior                                                      0.0958935
+## subcategoryCell Biology                                                       0.0624824
+## subcategoryChaotic Dynamics                                                   0.0855352
+## subcategoryChemical Physics                                                   0.0666942
+## subcategoryClassical Analysis and ODEs                                        0.0728951
+## subcategoryClassical Physics                                                  0.0906823
+## subcategoryClinical Trials                                                    0.1323829
+## subcategoryCombinatorics                                                      0.0636176
+## subcategoryCommutative Algebra                                                0.0792118
+## subcategoryComplex Variables                                                  0.0799631
+## subcategoryComputation                                                        0.0783684
+## subcategoryComputation and Language                                           0.0622270
+## subcategoryComputational Complexity                                           0.0817536
+## subcategoryComputational Engineering, Finance, and Science                    0.0845282
+## subcategoryComputational Finance                                              0.1622521
+## subcategoryComputational Geometry                                             0.0832640
+## subcategoryComputational Physics                                              0.0673851
+## subcategoryComputer Science and Game Theory                                   0.0752105
+## subcategoryComputer Vision and Pattern Recognition                            0.0611526
+## subcategoryComputers and Society                                              0.0655470
+## subcategoryCosmology and Nongalactic Astrophysics                             0.0625711
+## subcategoryCryptography and Security                                          0.0644002
+## subcategoryData Analysis, Statistics and Probability                          0.0786875
+## subcategoryData Structures and Algorithms                                     0.0676475
+## subcategoryDatabases                                                          0.0719993
+## subcategoryDermatology                                                        0.3410053
+## subcategoryDevelopmental Biology                                              0.0647338
+## subcategoryDifferential Geometry                                              0.0686771
+## subcategoryDigital Libraries                                                  0.0823095
+## subcategoryDiscrete Mathematics                                               0.0753873
+## subcategoryDisordered Systems and Neural Networks                             0.0740946
+## subcategoryDistributed, Parallel, and Cluster Computing                       0.0659370
+## subcategoryDynamical Systems                                                  0.0667071
+## subcategoryEarth and Planetary Astrophysics                                   0.0627023
+## subcategoryEcology                                                            0.0644408
+## subcategoryEconometrics                                                       0.1075249
+## subcategoryEconomics                                                          0.1026112
+## subcategoryEmergency Medicine                                                 0.1681004
+## subcategoryEmerging Technologies                                              0.0851308
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease)  0.1393856
+## subcategoryEpidemiology                                                       0.0635643
+## subcategoryEvolutionary Biology                                               0.0640502
+## subcategoryExactly Solvable and Integrable Systems                            0.0953404
+## subcategoryFluid Dynamics                                                     0.0685440
+## subcategoryFormal Languages and Automata Theory                               0.0911591
+## subcategoryFunctional Analysis                                                0.0696244
+## subcategoryGastroenterology                                                   0.1128672
+## subcategoryGeneral Economics                                                  0.1059564
+## subcategoryGeneral Finance                                                    0.1422390
+## subcategoryGeneral Literature                                                 0.4795446
+## subcategoryGeneral Mathematics                                                0.2084903
+## subcategoryGeneral Physics                                                    0.1238798
+## subcategoryGeneral Relativity and Quantum Cosmology                           0.0636612
+## subcategoryGeneral Topology                                                   0.1149309
+## subcategoryGenetic and Genomic Medicine                                       0.0736198
+## subcategoryGenetics                                                           0.0621692
+## subcategoryGenomics                                                           0.0618435
+## subcategoryGeometric Topology                                                 0.0727595
+## subcategoryGeophysics                                                         0.0806879
+## subcategoryGeriatric Medicine                                                 0.3749931
+## subcategoryGraphics                                                           0.0829769
+## subcategoryGroup Theory                                                       0.0740042
+## subcategoryHardware Architecture                                              0.0931445
+## subcategoryHealth Economics                                                   0.2454498
+## subcategoryHealth Informatics                                                 0.0921585
+## subcategoryHealth Policy                                                      0.1376315
+## subcategoryHealth Systems and Quality Improvement                             0.1288631
+## subcategoryHematology                                                         0.2656213
+## subcategoryHigh Energy Astrophysical Phenomena                                0.0625525
+## subcategoryHigh Energy Physics - Experiment                                   0.0650496
+## subcategoryHigh Energy Physics - Lattice                                      0.0783358
+## subcategoryHigh Energy Physics - Phenomenology                                0.0624768
+## subcategoryHigh Energy Physics - Theory                                       0.0636110
+## subcategoryHistory and Overview                                               0.1321490
+## subcategoryHistory and Philosophy of Physics                                  0.1392871
+## subcategoryHIV/AIDS                                                           0.1598703
+## subcategoryHuman-Computer Interaction                                         0.0663657
+## subcategoryImage and Video Processing                                         0.0642542
+## subcategoryImmunology                                                         0.0632010
+## subcategoryInfectious Diseases (except HIV/AIDS)                              0.0641364
+## subcategoryInformation Retrieval                                              0.0668131
+## subcategoryInformation Theory                                                 0.0618090
+## subcategoryInstrumentation and Detectors                                      0.0658822
+## subcategoryInstrumentation and Methods for Astrophysics                       0.0631789
+## subcategoryIntensive Care and Critical Care Medicine                          0.0983026
+## subcategoryK-Theory and Homology                                              0.1183890
+## subcategoryLogic                                                              0.0859353
+## subcategoryLogic in Computer Science                                          0.0742622
+## subcategoryMachine Learning                                                   0.0607625
+## subcategoryMaterials Science                                                  0.0617238
+## subcategoryMathematical Finance                                               0.1100139
+## subcategoryMathematical Physics                                               0.0633133
+## subcategoryMathematical Software                                              0.1288980
+## subcategoryMedical Education                                                  0.3520911
+## subcategoryMedical Physics                                                    0.0721399
+## subcategoryMesoscale and Nanoscale Physics                                    0.0621886
+## subcategoryMethodology                                                        0.0655571
+## subcategoryMetric Geometry                                                    0.0856199
+## subcategoryMicrobiology                                                       0.0616344
+## subcategoryMolecular Biology                                                  0.0638727
+## subcategoryMolecular Networks                                                 0.0916508
+## subcategoryMultiagent Systems                                                 0.0779018
+## subcategoryMultimedia                                                         0.0774268
+## subcategoryNephrology                                                         0.1957095
+## subcategoryNetworking and Internet Architecture                               0.0656203
+## subcategoryNeural and Evolutionary Computing                                  0.0683376
+## subcategoryNeurology                                                          0.0798162
+## subcategoryNeurons and Cognition                                              0.0697942
+## subcategoryNeuroscience                                                       0.0611459
+## subcategoryNuclear Experiment                                                 0.0704312
+## subcategoryNuclear Theory                                                     0.0671400
+## subcategoryNumber Theory                                                      0.0682814
+## subcategoryNumerical Analysis                                                 0.0635493
+## subcategoryNutrition                                                          0.2289396
+## subcategoryObstetrics and Gynecology                                          0.2477171
+## subcategoryOccupational and Environmental Health                              0.1208671
+## subcategoryOncology                                                           0.0795482
+## subcategoryOperating Systems                                                  0.2341257
+## subcategoryOperator Algebras                                                  0.0886018
+## subcategoryOphthalmology                                                      0.1711520
+## subcategoryOptics                                                             0.0630789
+## subcategoryOptimization and Control                                           0.0638022
+## subcategoryOther Computer Science                                             0.1413685
+## subcategoryOther Condensed Matter                                             0.0878662
+## subcategoryOther Quantitative Biology                                         0.2294327
+## subcategoryOther Statistics                                                   0.1488055
+## subcategoryOtolaryngology                                                     0.2745816
+## subcategoryPaleontology                                                       0.4011302
+## subcategoryPathology                                                          0.0748266
+## subcategoryPattern Formation and Solitons                                     0.0888257
+## subcategoryPediatrics                                                         0.1422693
+## subcategoryPerformance                                                        0.0869472
+## subcategoryPharmacology and Therapeutics                                      0.1392692
+## subcategoryPharmacology and Toxicology                                        0.0721092
+## subcategoryPhysics and Society                                                0.0675365
+## subcategoryPhysics Education                                                  0.0941145
+## subcategoryPhysiology                                                         0.0696458
+## subcategoryPlant Biology                                                      0.0646476
+## subcategoryPlasma Physics                                                     0.0759909
+## subcategoryPopular Physics                                                    0.1883804
+## subcategoryPopulations and Evolution                                          0.0698357
+## subcategoryPortfolio Management                                               0.2505047
+## subcategoryPricing of Securities                                              0.3335472
+## subcategoryPrimary Care Research                                              0.2438405
+## subcategoryProbability                                                        0.0651610
+## subcategoryProgramming Languages                                              0.0818814
+## subcategoryPsychiatry and Clinical Psychology                                 0.0801574
+## subcategoryPublic and Global Health                                           0.0721422
+## subcategoryQuantitative Methods                                               0.0682245
+## subcategoryQuantum Algebra                                                    0.0815742
+## subcategoryQuantum Gases                                                      0.0700780
+## subcategoryQuantum Physics                                                    0.0619975
+## subcategoryRadiology and Imaging                                              0.0963135
+## subcategoryRehabilitation Medicine and Physical Therapy                       0.1383516
+## subcategoryRepresentation Theory                                              0.0705176
+## subcategoryRespiratory Medicine                                               0.0985049
+## subcategoryRheumatology                                                       0.1346048
+## subcategoryRings and Algebras                                                 0.0743044
+## subcategoryRisk Management                                                    0.1647412
+## subcategoryRobotics                                                           0.0647527
+## subcategoryScientific Communication and Education                             0.0861757
+## subcategorySexual and Reproductive Health                                     0.2180886
+## subcategorySignal Processing                                                  0.0638010
+## subcategorySocial and Information Networks                                    0.0654391
+## subcategorySoft Condensed Matter                                              0.0656951
+## subcategorySoftware Engineering                                               0.0694222
+## subcategorySolar and Stellar Astrophysics                                     0.0621695
+## subcategorySound                                                              0.0692876
+## subcategorySpace Physics                                                      0.0804890
+## subcategorySpectral Theory                                                    0.0868332
+## subcategorySports Medicine                                                    0.3307919
+## subcategoryStatistical Finance                                                0.1323980
+## subcategoryStatistical Mechanics                                              0.0664578
+## subcategoryStatistics Theory                                                  0.0648323
+## subcategoryStrongly Correlated Electrons                                      0.0639437
+## subcategorySubcellular Processes                                              0.1360181
+## subcategorySuperconductivity                                                  0.0666659
+## subcategorySurgery                                                            0.2439676
+## subcategorySymbolic Computation                                               0.1322269
+## subcategorySymplectic Geometry                                                0.0933172
+## subcategorySynthetic Biology                                                  0.0776925
+## subcategorySystems and Control                                                0.0638189
+## subcategorySystems Biology                                                    0.0664443
+## subcategoryTheoretical Economics                                              0.1673625
+## subcategoryTissues and Organs                                                 0.0889119
+## subcategoryTrading and Market Microstructure                                  0.2412556
+## subcategoryZoology                                                            0.1003155
+##                                                                              z value
+## (Intercept)                                                                   29.629
+## month                                                                        -17.162
+## covidTRUE                                                                      3.460
+## covidpaperTRUE                                                                 4.446
+## subcategoryAdaptation and Self-Organizing Systems                             -1.391
+## subcategoryAddiction Medicine                                                 -4.985
+## subcategoryAlgebraic Geometry                                                  1.284
+## subcategoryAlgebraic Topology                                                  0.952
+## subcategoryAllergy and Immunology                                             -9.855
+## subcategoryAnalysis of PDEs                                                   -2.086
+## subcategoryAnimal Behavior and Cognition                                     -13.909
+## subcategoryApplications                                                       -7.428
+## subcategoryApplied Physics                                                    -2.305
+## subcategoryArtificial Intelligence                                            -1.756
+## subcategoryAstrophysics of Galaxies                                           -9.471
+## subcategoryAtmospheric and Oceanic Physics                                    -0.705
+## subcategoryAtomic and Molecular Clusters                                      -1.556
+## subcategoryAtomic Physics                                                      2.068
+## subcategoryAudio and Speech Processing                                        -1.974
+## subcategoryBiochemistry                                                      -14.803
+## subcategoryBioengineering                                                    -11.856
+## subcategoryBioinformatics                                                    -11.105
+## subcategoryBiological Physics                                                 -4.452
+## subcategoryBiomolecules                                                       -5.867
+## subcategoryBiophysics                                                         -9.978
+## subcategoryCancer Biology                                                    -18.457
+## subcategoryCardiovascular Medicine                                            -6.728
+## subcategoryCategory Theory                                                     0.494
+## subcategoryCell Behavior                                                      -6.617
+## subcategoryCell Biology                                                      -18.855
+## subcategoryChaotic Dynamics                                                    1.432
+## subcategoryChemical Physics                                                   -1.214
+## subcategoryClassical Analysis and ODEs                                        -0.445
+## subcategoryClassical Physics                                                   1.877
+## subcategoryClinical Trials                                                    -9.885
+## subcategoryCombinatorics                                                      -3.060
+## subcategoryCommutative Algebra                                                -2.373
+## subcategoryComplex Variables                                                   2.490
+## subcategoryComputation                                                         1.178
+## subcategoryComputation and Language                                           -6.087
+## subcategoryComputational Complexity                                            3.911
+## subcategoryComputational Engineering, Finance, and Science                     1.101
+## subcategoryComputational Finance                                               2.007
+## subcategoryComputational Geometry                                              1.944
+## subcategoryComputational Physics                                               3.054
+## subcategoryComputer Science and Game Theory                                   -0.009
+## subcategoryComputer Vision and Pattern Recognition                            -3.210
+## subcategoryComputers and Society                                              -7.904
+## subcategoryCosmology and Nongalactic Astrophysics                             -3.732
+## subcategoryCryptography and Security                                          -0.993
+## subcategoryData Analysis, Statistics and Probability                           0.517
+## subcategoryData Structures and Algorithms                                      2.479
+## subcategoryDatabases                                                          -2.539
+## subcategoryDermatology                                                        -2.639
+## subcategoryDevelopmental Biology                                             -19.648
+## subcategoryDifferential Geometry                                               1.719
+## subcategoryDigital Libraries                                                  -5.578
+## subcategoryDiscrete Mathematics                                                1.484
+## subcategoryDisordered Systems and Neural Networks                              1.098
+## subcategoryDistributed, Parallel, and Cluster Computing                        0.800
+## subcategoryDynamical Systems                                                  -1.389
+## subcategoryEarth and Planetary Astrophysics                                   -6.627
+## subcategoryEcology                                                           -14.618
+## subcategoryEconometrics                                                        2.480
+## subcategoryEconomics                                                          -0.394
+## subcategoryEmergency Medicine                                                 -4.695
+## subcategoryEmerging Technologies                                               1.817
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease)  -8.781
+## subcategoryEpidemiology                                                      -16.599
+## subcategoryEvolutionary Biology                                              -14.235
+## subcategoryExactly Solvable and Integrable Systems                            -0.233
+## subcategoryFluid Dynamics                                                      3.354
+## subcategoryFormal Languages and Automata Theory                                0.196
+## subcategoryFunctional Analysis                                                 0.696
+## subcategoryGastroenterology                                                   -9.633
+## subcategoryGeneral Economics                                                  -0.997
+## subcategoryGeneral Finance                                                     0.886
+## subcategoryGeneral Literature                                                  0.753
+## subcategoryGeneral Mathematics                                                 0.891
+## subcategoryGeneral Physics                                                     2.252
+## subcategoryGeneral Relativity and Quantum Cosmology                            1.385
+## subcategoryGeneral Topology                                                    1.174
+## subcategoryGenetic and Genomic Medicine                                      -18.441
+## subcategoryGenetics                                                          -18.664
+## subcategoryGenomics                                                          -16.314
+## subcategoryGeometric Topology                                                 -2.470
+## subcategoryGeophysics                                                         -0.833
+## subcategoryGeriatric Medicine                                                 -0.719
+## subcategoryGraphics                                                            0.609
+## subcategoryGroup Theory                                                        0.536
+## subcategoryHardware Architecture                                               2.031
+## subcategoryHealth Economics                                                   -1.162
+## subcategoryHealth Informatics                                                 -8.754
+## subcategoryHealth Policy                                                      -7.267
+## subcategoryHealth Systems and Quality Improvement                             -8.751
+## subcategoryHematology                                                         -3.010
+## subcategoryHigh Energy Astrophysical Phenomena                                -4.608
+## subcategoryHigh Energy Physics - Experiment                                   -2.663
+## subcategoryHigh Energy Physics - Lattice                                       6.212
+## subcategoryHigh Energy Physics - Phenomenology                                -0.046
+## subcategoryHigh Energy Physics - Theory                                        5.208
+## subcategoryHistory and Overview                                               -1.282
+## subcategoryHistory and Philosophy of Physics                                   2.190
+## subcategoryHIV/AIDS                                                           -8.093
+## subcategoryHuman-Computer Interaction                                         -7.791
+## subcategoryImage and Video Processing                                         -3.609
+## subcategoryImmunology                                                        -19.856
+## subcategoryInfectious Diseases (except HIV/AIDS)                             -18.006
+## subcategoryInformation Retrieval                                              -5.186
+## subcategoryInformation Theory                                                 -2.464
+## subcategoryInstrumentation and Detectors                                      -0.546
+## subcategoryInstrumentation and Methods for Astrophysics                       -4.132
+## subcategoryIntensive Care and Critical Care Medicine                          -7.329
+## subcategoryK-Theory and Homology                                               3.428
+## subcategoryLogic                                                               5.018
+## subcategoryLogic in Computer Science                                           2.937
+## subcategoryMachine Learning                                                   -0.502
+## subcategoryMaterials Science                                                  -2.927
+## subcategoryMathematical Finance                                               -0.969
+## subcategoryMathematical Physics                                                5.565
+## subcategoryMathematical Software                                               2.633
+## subcategoryMedical Education                                                  -1.530
+## subcategoryMedical Physics                                                    -5.098
+## subcategoryMesoscale and Nanoscale Physics                                     0.682
+## subcategoryMethodology                                                        -5.213
+## subcategoryMetric Geometry                                                     2.480
+## subcategoryMicrobiology                                                      -19.274
+## subcategoryMolecular Biology                                                 -17.488
+## subcategoryMolecular Networks                                                 -5.299
+## subcategoryMultiagent Systems                                                  0.192
+## subcategoryMultimedia                                                         -3.802
+## subcategoryNephrology                                                         -6.414
+## subcategoryNetworking and Internet Architecture                               -0.785
+## subcategoryNeural and Evolutionary Computing                                   1.440
+## subcategoryNeurology                                                         -12.910
+## subcategoryNeurons and Cognition                                              -6.506
+## subcategoryNeuroscience                                                      -16.477
+## subcategoryNuclear Experiment                                                 -2.676
+## subcategoryNuclear Theory                                                     -0.532
+## subcategoryNumber Theory                                                       2.201
+## subcategoryNumerical Analysis                                                 -0.495
+## subcategoryNutrition                                                          -6.339
+## subcategoryObstetrics and Gynecology                                          -7.973
+## subcategoryOccupational and Environmental Health                              -9.759
+## subcategoryOncology                                                          -14.754
+## subcategoryOperating Systems                                                   1.596
+## subcategoryOperator Algebras                                                   2.672
+## subcategoryOphthalmology                                                      -6.634
+## subcategoryOptics                                                              0.169
+## subcategoryOptimization and Control                                            0.325
+## subcategoryOther Computer Science                                             -1.084
+## subcategoryOther Condensed Matter                                              1.948
+## subcategoryOther Quantitative Biology                                         -2.629
+## subcategoryOther Statistics                                                   -3.201
+## subcategoryOtolaryngology                                                     -1.692
+## subcategoryPaleontology                                                       -0.756
+## subcategoryPathology                                                         -16.265
+## subcategoryPattern Formation and Solitons                                      0.956
+## subcategoryPediatrics                                                        -10.616
+## subcategoryPerformance                                                         1.176
+## subcategoryPharmacology and Therapeutics                                      -7.549
+## subcategoryPharmacology and Toxicology                                       -15.093
+## subcategoryPhysics and Society                                                -2.703
+## subcategoryPhysics Education                                                  -8.795
+## subcategoryPhysiology                                                        -16.737
+## subcategoryPlant Biology                                                     -16.579
+## subcategoryPlasma Physics                                                      3.086
+## subcategoryPopular Physics                                                    -2.069
+## subcategoryPopulations and Evolution                                          -7.234
+## subcategoryPortfolio Management                                               -1.018
+## subcategoryPricing of Securities                                               2.601
+## subcategoryPrimary Care Research                                              -4.979
+## subcategoryProbability                                                         1.236
+## subcategoryProgramming Languages                                               3.495
+## subcategoryPsychiatry and Clinical Psychology                                -15.943
+## subcategoryPublic and Global Health                                          -17.055
+## subcategoryQuantitative Methods                                               -8.068
+## subcategoryQuantum Algebra                                                     0.182
+## subcategoryQuantum Gases                                                       2.622
+## subcategoryQuantum Physics                                                     1.918
+## subcategoryRadiology and Imaging                                              -9.381
+## subcategoryRehabilitation Medicine and Physical Therapy                      -10.276
+## subcategoryRepresentation Theory                                              -1.875
+## subcategoryRespiratory Medicine                                              -10.073
+## subcategoryRheumatology                                                      -10.642
+## subcategoryRings and Algebras                                                 -3.680
+## subcategoryRisk Management                                                    -0.444
+## subcategoryRobotics                                                            1.491
+## subcategoryScientific Communication and Education                            -16.579
+## subcategorySexual and Reproductive Health                                     -9.431
+## subcategorySignal Processing                                                  -2.268
+## subcategorySocial and Information Networks                                    -4.590
+## subcategorySoft Condensed Matter                                              -2.120
+## subcategorySoftware Engineering                                               -0.318
+## subcategorySolar and Stellar Astrophysics                                     -7.477
+## subcategorySound                                                              -1.467
+## subcategorySpace Physics                                                      -2.208
+## subcategorySpectral Theory                                                     0.591
+## subcategorySports Medicine                                                    -1.252
+## subcategoryStatistical Finance                                                 0.164
+## subcategoryStatistical Mechanics                                               4.422
+## subcategoryStatistics Theory                                                  -2.877
+## subcategoryStrongly Correlated Electrons                                       1.870
+## subcategorySubcellular Processes                                              -3.684
+## subcategorySuperconductivity                                                  -0.095
+## subcategorySurgery                                                            -3.054
+## subcategorySymbolic Computation                                                1.449
+## subcategorySymplectic Geometry                                                 0.866
+## subcategorySynthetic Biology                                                 -10.825
+## subcategorySystems and Control                                                 1.642
+## subcategorySystems Biology                                                   -11.711
+## subcategoryTheoretical Economics                                               2.643
+## subcategoryTissues and Organs                                                 -8.823
+## subcategoryTrading and Market Microstructure                                   1.070
+## subcategoryZoology                                                            -9.409
+##                                                                              Pr(>|z|)
+## (Intercept)                                                                   < 2e-16
+## month                                                                         < 2e-16
+## covidTRUE                                                                    0.000541
+## covidpaperTRUE                                                               8.77e-06
+## subcategoryAdaptation and Self-Organizing Systems                            0.164319
+## subcategoryAddiction Medicine                                                6.20e-07
+## subcategoryAlgebraic Geometry                                                0.199014
+## subcategoryAlgebraic Topology                                                0.341027
+## subcategoryAllergy and Immunology                                             < 2e-16
+## subcategoryAnalysis of PDEs                                                  0.036941
+## subcategoryAnimal Behavior and Cognition                                      < 2e-16
+## subcategoryApplications                                                      1.10e-13
+## subcategoryApplied Physics                                                   0.021148
+## subcategoryArtificial Intelligence                                           0.079050
+## subcategoryAstrophysics of Galaxies                                           < 2e-16
+## subcategoryAtmospheric and Oceanic Physics                                   0.480606
+## subcategoryAtomic and Molecular Clusters                                     0.119709
+## subcategoryAtomic Physics                                                    0.038638
+## subcategoryAudio and Speech Processing                                       0.048382
+## subcategoryBiochemistry                                                       < 2e-16
+## subcategoryBioengineering                                                     < 2e-16
+## subcategoryBioinformatics                                                     < 2e-16
+## subcategoryBiological Physics                                                8.50e-06
+## subcategoryBiomolecules                                                      4.44e-09
+## subcategoryBiophysics                                                         < 2e-16
+## subcategoryCancer Biology                                                     < 2e-16
+## subcategoryCardiovascular Medicine                                           1.73e-11
+## subcategoryCategory Theory                                                   0.621181
+## subcategoryCell Behavior                                                     3.65e-11
+## subcategoryCell Biology                                                       < 2e-16
+## subcategoryChaotic Dynamics                                                  0.152159
+## subcategoryChemical Physics                                                  0.224746
+## subcategoryClassical Analysis and ODEs                                       0.656015
+## subcategoryClassical Physics                                                 0.060490
+## subcategoryClinical Trials                                                    < 2e-16
+## subcategoryCombinatorics                                                     0.002214
+## subcategoryCommutative Algebra                                               0.017658
+## subcategoryComplex Variables                                                 0.012791
+## subcategoryComputation                                                       0.238883
+## subcategoryComputation and Language                                          1.15e-09
+## subcategoryComputational Complexity                                          9.20e-05
+## subcategoryComputational Engineering, Finance, and Science                   0.271105
+## subcategoryComputational Finance                                             0.044717
+## subcategoryComputational Geometry                                            0.051898
+## subcategoryComputational Physics                                             0.002260
+## subcategoryComputer Science and Game Theory                                  0.993074
+## subcategoryComputer Vision and Pattern Recognition                           0.001330
+## subcategoryComputers and Society                                             2.71e-15
+## subcategoryCosmology and Nongalactic Astrophysics                            0.000190
+## subcategoryCryptography and Security                                         0.320585
+## subcategoryData Analysis, Statistics and Probability                         0.604885
+## subcategoryData Structures and Algorithms                                    0.013191
+## subcategoryDatabases                                                         0.011113
+## subcategoryDermatology                                                       0.008308
+## subcategoryDevelopmental Biology                                              < 2e-16
+## subcategoryDifferential Geometry                                             0.085578
+## subcategoryDigital Libraries                                                 2.44e-08
+## subcategoryDiscrete Mathematics                                              0.137827
+## subcategoryDisordered Systems and Neural Networks                            0.272090
+## subcategoryDistributed, Parallel, and Cluster Computing                      0.423609
+## subcategoryDynamical Systems                                                 0.164983
+## subcategoryEarth and Planetary Astrophysics                                  3.43e-11
+## subcategoryEcology                                                            < 2e-16
+## subcategoryEconometrics                                                      0.013129
+## subcategoryEconomics                                                         0.693552
+## subcategoryEmergency Medicine                                                2.67e-06
+## subcategoryEmerging Technologies                                             0.069207
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease)  < 2e-16
+## subcategoryEpidemiology                                                       < 2e-16
+## subcategoryEvolutionary Biology                                               < 2e-16
+## subcategoryExactly Solvable and Integrable Systems                           0.815485
+## subcategoryFluid Dynamics                                                    0.000797
+## subcategoryFormal Languages and Automata Theory                              0.844457
+## subcategoryFunctional Analysis                                               0.486503
+## subcategoryGastroenterology                                                   < 2e-16
+## subcategoryGeneral Economics                                                 0.318686
+## subcategoryGeneral Finance                                                   0.375621
+## subcategoryGeneral Literature                                                0.451718
+## subcategoryGeneral Mathematics                                               0.372864
+## subcategoryGeneral Physics                                                   0.024320
+## subcategoryGeneral Relativity and Quantum Cosmology                          0.165994
+## subcategoryGeneral Topology                                                  0.240451
+## subcategoryGenetic and Genomic Medicine                                       < 2e-16
+## subcategoryGenetics                                                           < 2e-16
+## subcategoryGenomics                                                           < 2e-16
+## subcategoryGeometric Topology                                                0.013499
+## subcategoryGeophysics                                                        0.404865
+## subcategoryGeriatric Medicine                                                0.472186
+## subcategoryGraphics                                                          0.542557
+## subcategoryGroup Theory                                                      0.592039
+## subcategoryHardware Architecture                                             0.042249
+## subcategoryHealth Economics                                                  0.245065
+## subcategoryHealth Informatics                                                 < 2e-16
+## subcategoryHealth Policy                                                     3.66e-13
+## subcategoryHealth Systems and Quality Improvement                             < 2e-16
+## subcategoryHematology                                                        0.002615
+## subcategoryHigh Energy Astrophysical Phenomena                               4.06e-06
+## subcategoryHigh Energy Physics - Experiment                                  0.007734
+## subcategoryHigh Energy Physics - Lattice                                     5.22e-10
+## subcategoryHigh Energy Physics - Phenomenology                               0.963362
+## subcategoryHigh Energy Physics - Theory                                      1.91e-07
+## subcategoryHistory and Overview                                              0.199827
+## subcategoryHistory and Philosophy of Physics                                 0.028531
+## subcategoryHIV/AIDS                                                          5.82e-16
+## subcategoryHuman-Computer Interaction                                        6.63e-15
+## subcategoryImage and Video Processing                                        0.000307
+## subcategoryImmunology                                                         < 2e-16
+## subcategoryInfectious Diseases (except HIV/AIDS)                              < 2e-16
+## subcategoryInformation Retrieval                                             2.14e-07
+## subcategoryInformation Theory                                                0.013735
+## subcategoryInstrumentation and Detectors                                     0.585063
+## subcategoryInstrumentation and Methods for Astrophysics                      3.60e-05
+## subcategoryIntensive Care and Critical Care Medicine                         2.32e-13
+## subcategoryK-Theory and Homology                                             0.000607
+## subcategoryLogic                                                             5.21e-07
+## subcategoryLogic in Computer Science                                         0.003316
+## subcategoryMachine Learning                                                  0.615570
+## subcategoryMaterials Science                                                 0.003421
+## subcategoryMathematical Finance                                              0.332773
+## subcategoryMathematical Physics                                              2.62e-08
+## subcategoryMathematical Software                                             0.008460
+## subcategoryMedical Education                                                 0.125946
+## subcategoryMedical Physics                                                   3.43e-07
+## subcategoryMesoscale and Nanoscale Physics                                   0.495469
+## subcategoryMethodology                                                       1.85e-07
+## subcategoryMetric Geometry                                                   0.013130
+## subcategoryMicrobiology                                                       < 2e-16
+## subcategoryMolecular Biology                                                  < 2e-16
+## subcategoryMolecular Networks                                                1.16e-07
+## subcategoryMultiagent Systems                                                0.847608
+## subcategoryMultimedia                                                        0.000143
+## subcategoryNephrology                                                        1.41e-10
+## subcategoryNetworking and Internet Architecture                              0.432400
+## subcategoryNeural and Evolutionary Computing                                 0.149886
+## subcategoryNeurology                                                          < 2e-16
+## subcategoryNeurons and Cognition                                             7.71e-11
+## subcategoryNeuroscience                                                       < 2e-16
+## subcategoryNuclear Experiment                                                0.007441
+## subcategoryNuclear Theory                                                    0.594553
+## subcategoryNumber Theory                                                     0.027731
+## subcategoryNumerical Analysis                                                0.620556
+## subcategoryNutrition                                                         2.31e-10
+## subcategoryObstetrics and Gynecology                                         1.54e-15
+## subcategoryOccupational and Environmental Health                              < 2e-16
+## subcategoryOncology                                                           < 2e-16
+## subcategoryOperating Systems                                                 0.110479
+## subcategoryOperator Algebras                                                 0.007543
+## subcategoryOphthalmology                                                     3.26e-11
+## subcategoryOptics                                                            0.865985
+## subcategoryOptimization and Control                                          0.745342
+## subcategoryOther Computer Science                                            0.278179
+## subcategoryOther Condensed Matter                                            0.051473
+## subcategoryOther Quantitative Biology                                        0.008574
+## subcategoryOther Statistics                                                  0.001371
+## subcategoryOtolaryngology                                                    0.090702
+## subcategoryPaleontology                                                      0.449530
+## subcategoryPathology                                                          < 2e-16
+## subcategoryPattern Formation and Solitons                                    0.339072
+## subcategoryPediatrics                                                         < 2e-16
+## subcategoryPerformance                                                       0.239433
+## subcategoryPharmacology and Therapeutics                                     4.38e-14
+## subcategoryPharmacology and Toxicology                                        < 2e-16
+## subcategoryPhysics and Society                                               0.006868
+## subcategoryPhysics Education                                                  < 2e-16
+## subcategoryPhysiology                                                         < 2e-16
+## subcategoryPlant Biology                                                      < 2e-16
+## subcategoryPlasma Physics                                                    0.002026
+## subcategoryPopular Physics                                                   0.038500
+## subcategoryPopulations and Evolution                                         4.69e-13
+## subcategoryPortfolio Management                                              0.308733
+## subcategoryPricing of Securities                                             0.009299
+## subcategoryPrimary Care Research                                             6.39e-07
+## subcategoryProbability                                                       0.216444
+## subcategoryProgramming Languages                                             0.000475
+## subcategoryPsychiatry and Clinical Psychology                                 < 2e-16
+## subcategoryPublic and Global Health                                           < 2e-16
+## subcategoryQuantitative Methods                                              7.14e-16
+## subcategoryQuantum Algebra                                                   0.855693
+## subcategoryQuantum Gases                                                     0.008740
+## subcategoryQuantum Physics                                                   0.055078
+## subcategoryRadiology and Imaging                                              < 2e-16
+## subcategoryRehabilitation Medicine and Physical Therapy                       < 2e-16
+## subcategoryRepresentation Theory                                             0.060738
+## subcategoryRespiratory Medicine                                               < 2e-16
+## subcategoryRheumatology                                                       < 2e-16
+## subcategoryRings and Algebras                                                0.000233
+## subcategoryRisk Management                                                   0.657232
+## subcategoryRobotics                                                          0.136030
+## subcategoryScientific Communication and Education                             < 2e-16
+## subcategorySexual and Reproductive Health                                     < 2e-16
+## subcategorySignal Processing                                                 0.023359
+## subcategorySocial and Information Networks                                   4.43e-06
+## subcategorySoft Condensed Matter                                             0.034041
+## subcategorySoftware Engineering                                              0.750289
+## subcategorySolar and Stellar Astrophysics                                    7.59e-14
+## subcategorySound                                                             0.142480
+## subcategorySpace Physics                                                     0.027261
+## subcategorySpectral Theory                                                   0.554622
+## subcategorySports Medicine                                                   0.210545
+## subcategoryStatistical Finance                                               0.869815
+## subcategoryStatistical Mechanics                                             9.76e-06
+## subcategoryStatistics Theory                                                 0.004013
+## subcategoryStrongly Correlated Electrons                                     0.061475
+## subcategorySubcellular Processes                                             0.000230
+## subcategorySuperconductivity                                                 0.924663
+## subcategorySurgery                                                           0.002257
+## subcategorySymbolic Computation                                              0.147432
+## subcategorySymplectic Geometry                                               0.386400
+## subcategorySynthetic Biology                                                  < 2e-16
+## subcategorySystems and Control                                               0.100511
+## subcategorySystems Biology                                                    < 2e-16
+## subcategoryTheoretical Economics                                             0.008219
+## subcategoryTissues and Organs                                                 < 2e-16
+## subcategoryTrading and Market Microstructure                                 0.284828
+## subcategoryZoology                                                            < 2e-16
+##                                                                                 
+## (Intercept)                                                                  ***
+## month                                                                        ***
+## covidTRUE                                                                    ***
+## covidpaperTRUE                                                               ***
+## subcategoryAdaptation and Self-Organizing Systems                               
+## subcategoryAddiction Medicine                                                ***
+## subcategoryAlgebraic Geometry                                                   
+## subcategoryAlgebraic Topology                                                   
+## subcategoryAllergy and Immunology                                            ***
+## subcategoryAnalysis of PDEs                                                  *  
+## subcategoryAnimal Behavior and Cognition                                     ***
+## subcategoryApplications                                                      ***
+## subcategoryApplied Physics                                                   *  
+## subcategoryArtificial Intelligence                                           .  
+## subcategoryAstrophysics of Galaxies                                          ***
+## subcategoryAtmospheric and Oceanic Physics                                      
+## subcategoryAtomic and Molecular Clusters                                        
+## subcategoryAtomic Physics                                                    *  
+## subcategoryAudio and Speech Processing                                       *  
+## subcategoryBiochemistry                                                      ***
+## subcategoryBioengineering                                                    ***
+## subcategoryBioinformatics                                                    ***
+## subcategoryBiological Physics                                                ***
+## subcategoryBiomolecules                                                      ***
+## subcategoryBiophysics                                                        ***
+## subcategoryCancer Biology                                                    ***
+## subcategoryCardiovascular Medicine                                           ***
+## subcategoryCategory Theory                                                      
+## subcategoryCell Behavior                                                     ***
+## subcategoryCell Biology                                                      ***
+## subcategoryChaotic Dynamics                                                     
+## subcategoryChemical Physics                                                     
+## subcategoryClassical Analysis and ODEs                                          
+## subcategoryClassical Physics                                                 .  
+## subcategoryClinical Trials                                                   ***
+## subcategoryCombinatorics                                                     ** 
+## subcategoryCommutative Algebra                                               *  
+## subcategoryComplex Variables                                                 *  
+## subcategoryComputation                                                          
+## subcategoryComputation and Language                                          ***
+## subcategoryComputational Complexity                                          ***
+## subcategoryComputational Engineering, Finance, and Science                      
+## subcategoryComputational Finance                                             *  
+## subcategoryComputational Geometry                                            .  
+## subcategoryComputational Physics                                             ** 
+## subcategoryComputer Science and Game Theory                                     
+## subcategoryComputer Vision and Pattern Recognition                           ** 
+## subcategoryComputers and Society                                             ***
+## subcategoryCosmology and Nongalactic Astrophysics                            ***
+## subcategoryCryptography and Security                                            
+## subcategoryData Analysis, Statistics and Probability                            
+## subcategoryData Structures and Algorithms                                    *  
+## subcategoryDatabases                                                         *  
+## subcategoryDermatology                                                       ** 
+## subcategoryDevelopmental Biology                                             ***
+## subcategoryDifferential Geometry                                             .  
+## subcategoryDigital Libraries                                                 ***
+## subcategoryDiscrete Mathematics                                                 
+## subcategoryDisordered Systems and Neural Networks                               
+## subcategoryDistributed, Parallel, and Cluster Computing                         
+## subcategoryDynamical Systems                                                    
+## subcategoryEarth and Planetary Astrophysics                                  ***
+## subcategoryEcology                                                           ***
+## subcategoryEconometrics                                                      *  
+## subcategoryEconomics                                                            
+## subcategoryEmergency Medicine                                                ***
+## subcategoryEmerging Technologies                                             .  
+## subcategoryEndocrinology (including Diabetes Mellitus and Metabolic Disease) ***
+## subcategoryEpidemiology                                                      ***
+## subcategoryEvolutionary Biology                                              ***
+## subcategoryExactly Solvable and Integrable Systems                              
+## subcategoryFluid Dynamics                                                    ***
+## subcategoryFormal Languages and Automata Theory                                 
+## subcategoryFunctional Analysis                                                  
+## subcategoryGastroenterology                                                  ***
+## subcategoryGeneral Economics                                                    
+## subcategoryGeneral Finance                                                      
+## subcategoryGeneral Literature                                                   
+## subcategoryGeneral Mathematics                                                  
+## subcategoryGeneral Physics                                                   *  
+## subcategoryGeneral Relativity and Quantum Cosmology                             
+## subcategoryGeneral Topology                                                     
+## subcategoryGenetic and Genomic Medicine                                      ***
+## subcategoryGenetics                                                          ***
+## subcategoryGenomics                                                          ***
+## subcategoryGeometric Topology                                                *  
+## subcategoryGeophysics                                                           
+## subcategoryGeriatric Medicine                                                   
+## subcategoryGraphics                                                             
+## subcategoryGroup Theory                                                         
+## subcategoryHardware Architecture                                             *  
+## subcategoryHealth Economics                                                     
+## subcategoryHealth Informatics                                                ***
+## subcategoryHealth Policy                                                     ***
+## subcategoryHealth Systems and Quality Improvement                            ***
+## subcategoryHematology                                                        ** 
+## subcategoryHigh Energy Astrophysical Phenomena                               ***
+## subcategoryHigh Energy Physics - Experiment                                  ** 
+## subcategoryHigh Energy Physics - Lattice                                     ***
+## subcategoryHigh Energy Physics - Phenomenology                                  
+## subcategoryHigh Energy Physics - Theory                                      ***
+## subcategoryHistory and Overview                                                 
+## subcategoryHistory and Philosophy of Physics                                 *  
+## subcategoryHIV/AIDS                                                          ***
+## subcategoryHuman-Computer Interaction                                        ***
+## subcategoryImage and Video Processing                                        ***
+## subcategoryImmunology                                                        ***
+## subcategoryInfectious Diseases (except HIV/AIDS)                             ***
+## subcategoryInformation Retrieval                                             ***
+## subcategoryInformation Theory                                                *  
+## subcategoryInstrumentation and Detectors                                        
+## subcategoryInstrumentation and Methods for Astrophysics                      ***
+## subcategoryIntensive Care and Critical Care Medicine                         ***
+## subcategoryK-Theory and Homology                                             ***
+## subcategoryLogic                                                             ***
+## subcategoryLogic in Computer Science                                         ** 
+## subcategoryMachine Learning                                                     
+## subcategoryMaterials Science                                                 ** 
+## subcategoryMathematical Finance                                                 
+## subcategoryMathematical Physics                                              ***
+## subcategoryMathematical Software                                             ** 
+## subcategoryMedical Education                                                    
+## subcategoryMedical Physics                                                   ***
+## subcategoryMesoscale and Nanoscale Physics                                      
+## subcategoryMethodology                                                       ***
+## subcategoryMetric Geometry                                                   *  
+## subcategoryMicrobiology                                                      ***
+## subcategoryMolecular Biology                                                 ***
+## subcategoryMolecular Networks                                                ***
+## subcategoryMultiagent Systems                                                   
+## subcategoryMultimedia                                                        ***
+## subcategoryNephrology                                                        ***
+## subcategoryNetworking and Internet Architecture                                 
+## subcategoryNeural and Evolutionary Computing                                    
+## subcategoryNeurology                                                         ***
+## subcategoryNeurons and Cognition                                             ***
+## subcategoryNeuroscience                                                      ***
+## subcategoryNuclear Experiment                                                ** 
+## subcategoryNuclear Theory                                                       
+## subcategoryNumber Theory                                                     *  
+## subcategoryNumerical Analysis                                                   
+## subcategoryNutrition                                                         ***
+## subcategoryObstetrics and Gynecology                                         ***
+## subcategoryOccupational and Environmental Health                             ***
+## subcategoryOncology                                                          ***
+## subcategoryOperating Systems                                                    
+## subcategoryOperator Algebras                                                 ** 
+## subcategoryOphthalmology                                                     ***
+## subcategoryOptics                                                               
+## subcategoryOptimization and Control                                             
+## subcategoryOther Computer Science                                               
+## subcategoryOther Condensed Matter                                            .  
+## subcategoryOther Quantitative Biology                                        ** 
+## subcategoryOther Statistics                                                  ** 
+## subcategoryOtolaryngology                                                    .  
+## subcategoryPaleontology                                                         
+## subcategoryPathology                                                         ***
+## subcategoryPattern Formation and Solitons                                       
+## subcategoryPediatrics                                                        ***
+## subcategoryPerformance                                                          
+## subcategoryPharmacology and Therapeutics                                     ***
+## subcategoryPharmacology and Toxicology                                       ***
+## subcategoryPhysics and Society                                               ** 
+## subcategoryPhysics Education                                                 ***
+## subcategoryPhysiology                                                        ***
+## subcategoryPlant Biology                                                     ***
+## subcategoryPlasma Physics                                                    ** 
+## subcategoryPopular Physics                                                   *  
+## subcategoryPopulations and Evolution                                         ***
+## subcategoryPortfolio Management                                                 
+## subcategoryPricing of Securities                                             ** 
+## subcategoryPrimary Care Research                                             ***
+## subcategoryProbability                                                          
+## subcategoryProgramming Languages                                             ***
+## subcategoryPsychiatry and Clinical Psychology                                ***
+## subcategoryPublic and Global Health                                          ***
+## subcategoryQuantitative Methods                                              ***
+## subcategoryQuantum Algebra                                                      
+## subcategoryQuantum Gases                                                     ** 
+## subcategoryQuantum Physics                                                   .  
+## subcategoryRadiology and Imaging                                             ***
+## subcategoryRehabilitation Medicine and Physical Therapy                      ***
+## subcategoryRepresentation Theory                                             .  
+## subcategoryRespiratory Medicine                                              ***
+## subcategoryRheumatology                                                      ***
+## subcategoryRings and Algebras                                                ***
+## subcategoryRisk Management                                                      
+## subcategoryRobotics                                                             
+## subcategoryScientific Communication and Education                            ***
+## subcategorySexual and Reproductive Health                                    ***
+## subcategorySignal Processing                                                 *  
+## subcategorySocial and Information Networks                                   ***
+## subcategorySoft Condensed Matter                                             *  
+## subcategorySoftware Engineering                                                 
+## subcategorySolar and Stellar Astrophysics                                    ***
+## subcategorySound                                                                
+## subcategorySpace Physics                                                     *  
+## subcategorySpectral Theory                                                      
+## subcategorySports Medicine                                                      
+## subcategoryStatistical Finance                                                  
+## subcategoryStatistical Mechanics                                             ***
+## subcategoryStatistics Theory                                                 ** 
+## subcategoryStrongly Correlated Electrons                                     .  
+## subcategorySubcellular Processes                                             ***
+## subcategorySuperconductivity                                                    
+## subcategorySurgery                                                           ** 
+## subcategorySymbolic Computation                                                 
+## subcategorySymplectic Geometry                                                  
+## subcategorySynthetic Biology                                                 ***
+## subcategorySystems and Control                                                  
+## subcategorySystems Biology                                                   ***
+## subcategoryTheoretical Economics                                             ** 
+## subcategoryTissues and Organs                                                ***
+## subcategoryTrading and Market Microstructure                                    
+## subcategoryZoology                                                           ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## (Dispersion parameter for binomial family taken to be 1)
+## 
+##     Null deviance: 60104.1  on 3472  degrees of freedom
+## Residual deviance:  5088.7  on 3258  degrees of freedom
+## AIC: 24195
+## 
+## Number of Fisher Scoring iterations: 4
+

Ojo que ahora esto son odds ratio! Podemos transformar el intercept a la unidad original:

+
exp(coef(fit_glm)[1]) / (exp(coef(fit_glm)[1]) + 1)
+
## (Intercept) 
+##   0.8570044
+

No está mal. Quizás un poco alto. Y los efectos:

+
sjPlot::plot_model(fit_glm)
+

+
+
+

2.2.2 glmer

+

Y ya todo junto: binomial fraccional de efectos mixtos. Hay que escalar el mes o si no se queja (también se puede cambiar por el año, no hay mucha diferencia):

+
fit_glmer <- glmer(
+  r_male ~ scale(month, FALSE) + covid + covidpaper + (1 | category/subcategory),
+  df.agg, family=binomial, weights=total)
+

Tenemos:

+
model_performance(fit_glmer)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
## pseudo-R2 = 0.79687
+

Similar, y:

+
model_assumptions(fit_glmer)
+
## `geom_smooth()` using formula 'y ~ x'
+

+

Similar. Finalmente:

+
summary(fit_glmer)
+
## Generalized linear mixed model fit by maximum likelihood (Laplace
+##   Approximation) [glmerMod]
+##  Family: binomial  ( logit )
+## Formula: 
+## r_male ~ scale(month, FALSE) + covid + covidpaper + (1 | category/subcategory)
+##    Data: df.agg
+## Weights: total
+## 
+##      AIC      BIC   logLik deviance df.resid 
+##  24734.5  24771.4 -12361.3  24722.5     3467 
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -5.5921 -0.8027  0.0669  0.8833  6.0866 
+## 
+## Random effects:
+##  Groups               Name        Variance Std.Dev.
+##  subcategory:category (Intercept) 0.05536  0.2353  
+##  category             (Intercept) 0.18635  0.4317  
+## Number of obs: 3473, groups:  subcategory:category, 219; category, 8
+## 
+## Fixed effects:
+##                      Estimate Std. Error z value Pr(>|z|)    
+## (Intercept)          1.507958   0.154624   9.752  < 2e-16 ***
+## scale(month, FALSE) -0.094377   0.005458 -17.290  < 2e-16 ***
+## covidTRUE            0.020527   0.005823   3.525 0.000423 ***
+## covidpaperTRUE       0.072710   0.015417   4.716  2.4e-06 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Correlation of Fixed Effects:
+##             (Intr) s(,FAL covdTRUE
+## scl(,FALSE) -0.030                
+## covidTRUE    0.010 -0.620         
+## covdpprTRUE -0.004  0.002 -0.128
+

Esta es la estimación del intercept más ajustada hasta ahora:

+
exp(fixef(fit_glmer)[1]) / (exp(fixef(fit_glmer)[1]) + 1)
+
## (Intercept) 
+##   0.8187583
+

Nice! Efectos fijos:

+
sjPlot::plot_model(fit_glmer) + ylim(0.7, 1.3)
+
## Scale for 'y' is already present. Adding another scale for 'y', which will
+## replace the existing scale.
+

+

Y siguen saliendo positivos. Lo que me escama es que se ve una bajada en las gráficas de arriba. La media de la proporción de hombres para 2020 es más baja, pero las variables COVID salen siempre positivas (!). ¿Por qué?

+

Efectos aleatorios:

+
sjPlot::plot_model(fit_glmer, "re")[[2]]
+

+
sjPlot::plot_model(fit_glmer, "re")[[1]]
+

+
+
+
+

2.3 Postre

+

¿Qué pasa si usamos categorías (demasiado gruesas) más otra variable predictiva (la que nos hemos dejado todo este rato)?

+
fit_glmer2 <- glmer(
+  r_male ~ scale(month, FALSE) + covid + covidpaper + diss + (1 | category),
+  df.agg, family=binomial, weights=total)
+

Esto es interesante:

+
model_performance(fit_glmer2)
+
## `geom_smooth()` using formula 'y ~ x'
+

+
## pseudo-R2 = 0.7022666
+

Siguen saliendo dos modos, dos clusters, pero se explica bastante más variabilidad.

+
model_assumptions(fit_glmer2)
+
## `geom_smooth()` using formula 'y ~ x'
+

+

Los residuales siguen siendo fetén, porque el problema es el tipo de modelo (binomial fraccional vs. gaussiano). ¿Adivináis qué pasa con los coeficientes COVID?

+
summary(fit_glmer2)
+
## Generalized linear mixed model fit by maximum likelihood (Laplace
+##   Approximation) [glmerMod]
+##  Family: binomial  ( logit )
+## Formula: r_male ~ scale(month, FALSE) + covid + covidpaper + diss + (1 |  
+##     category)
+##    Data: df.agg
+## Weights: total
+## 
+##      AIC      BIC   logLik deviance df.resid 
+##  27854.5  27891.4 -13921.3  27842.5     3467 
+## 
+## Scaled residuals: 
+##     Min      1Q  Median      3Q     Max 
+## -6.5889 -1.0988 -0.0062  1.0450  5.0789 
+## 
+## Random effects:
+##  Groups   Name        Variance Std.Dev.
+##  category (Intercept) 0.04472  0.2115  
+## Number of obs: 3473, groups:  category, 8
+## 
+## Fixed effects:
+##                      Estimate Std. Error z value Pr(>|z|)    
+## (Intercept)          0.388100   0.076254   5.090 3.59e-07 ***
+## scale(month, FALSE) -0.091253   0.005415 -16.851  < 2e-16 ***
+## covidTRUE            0.020280   0.005797   3.498 0.000469 ***
+## covidpaperTRUE       0.063838   0.013987   4.564 5.02e-06 ***
+## diss                 1.942396   0.023573  82.400  < 2e-16 ***
+## ---
+## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+## 
+## Correlation of Fixed Effects:
+##             (Intr) s(,FAL covdTRUE cvdpTRUE
+## scl(,FALSE) -0.065                         
+## covidTRUE    0.025 -0.625                  
+## covdpprTRUE -0.011  0.011 -0.139           
+## diss        -0.173  0.022 -0.015   -0.001
+

Siguen saliendo positivos. Lo que pasa ahora es que el índice de disimilaridad se ha llevado medio intercept (!).

+
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + diff --git a/old/iu_features.R b/old/iu_features.R new file mode 100644 index 0000000..7fec279 --- /dev/null +++ b/old/iu_features.R @@ -0,0 +1,35 @@ +source("iu_clean.R") + +# covid, not covid +text[, covidpaper := grepl("covid-19", title, ignore.case=TRUE) | + grepl("sars-cov-2", title, ignore.case=TRUE) | + grepl("coronavirus", title, ignore.case=TRUE)] + +merge_info <- function(data) { + df <- merge(data, articles, by="id") + df <- merge(df, text[, c("id", "covidpaper")], by="id") + df <- df[!duplicated(df)] + df <- na.omit(df) + df[, `:=`( + year = lubridate::year(date), + month = lubridate::month(date), + week = lubridate::week(date) + )] + df[year < 2020, covidpaper := FALSE] + # remove subcategories without past data + df <- df[, minyear := min(year)][minyear < 2020][, minyear := NULL] + df +} + +# features all +df <- merge_info(authors[, .( + n_male = sum(gender == "male", na.rm=TRUE), + n_female = sum(gender == "female", na.rm=TRUE), + n_na = sum(is.na(gender)) +), by=id]) + +# features first +df.first <- merge_info(authors[alphabetical_ordered==FALSE & rank=="first"]) + +# features last +df.last <- merge_info(authors[alphabetical_ordered==FALSE & rank=="last"]) diff --git a/old/iu_subcategories.Rmd b/old/iu_subcategories.Rmd new file mode 100644 index 0000000..ba1ac8c --- /dev/null +++ b/old/iu_subcategories.Rmd @@ -0,0 +1,58 @@ +--- +title: "Dealing with subcategories" +author: "Iñaki Úcar" +output: + html_document: + df_print: paged + number_sections: true + toc: true + toc_float: + collapsed: false + smooth_scroll: false +--- + +```{r} +source("iu_clean.R") +``` + +Approach alternativo para obtener subcategorías _razonables_ (i.e., no demasiado generales, no demasiado particulares): + +1. Creo un ranking de subcategorías contando cuántas veces aparecen en distintos papers. Las subcategorías que aparezcan en más papers serán más generales. + +```{r} +socarxiv <- articles[repository == "socarxiv"] +socarxiv.v <- socarxiv[, .(weight=.N), by=subcategory][order(weight, decreasing=TRUE)] +socarxiv.v +``` + +2. Utilizo ese ranking para quedarme solo con la subcategoría más general de cada paper, y con esto veo cuántas subcategorías me quedan. + +```{r} +get_main <- function(data, del, all) { + data[!subcategory %in% del][ + , .(main = all[all %in% subcategory][1]), by=id][ + , .N, by=main][order(N, decreasing=TRUE)] +} + +get_main(socarxiv, NULL, socarxiv.v$subcategory) +``` + +Dividiríamos todos los papers en solo `r nrow(.Last.value)` subcategorías. En total, socarxiv tiene `r length(unique(socarxiv$id))` papers. Por tanto, está claro que "Social and Behavioural Sciences" es demasiado general. Si la borramos y repetimos el proceso, obtenemos: + +```{r} +del <- "Social and Behavioral Sciences" +get_main(socarxiv, del, socarxiv.v$subcategory) +``` + +Ahora `r nrow(.Last.value)` subcategorías parece mucho más razonable. Nos podemos quedar aquí o podemos seguir un poco más. Podemos partir "Sociology" y "Art and Humanities". Además, dado que tenemos "Economics" y "Psychology" en otros repositorios, podemos partirlas también para ver qué nuevas subcategorías aparecen. A continuación se pintan las nuevas subcategorías que aparecen a cada paso: + +```{r} +last <- get_main(socarxiv, del, socarxiv.v$subcategory)$main +del <- c("Social and Behavioral Sciences", "Sociology", "Arts and Humanities", "Psychology", "Economics") +for (i in seq_along(del)[-1]) { + sel <- get_main(socarxiv, del[1:i], socarxiv.v$subcategory) + cat("Al eliminar", del[i], "aparece:\n") + print(setdiff(sel$main, last)); cat("\n") + last <- sel$main +} +``` diff --git a/old/iu_subcategories.html b/old/iu_subcategories.html new file mode 100644 index 0000000..0a1d12d --- /dev/null +++ b/old/iu_subcategories.html @@ -0,0 +1,3007 @@ + + + + + + + + + + + + + + +Dealing with subcategories + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + +
source("iu_clean.R")
+

Approach alternativo para obtener subcategorías razonables (i.e., no demasiado generales, no demasiado particulares):

+
    +
  1. Creo un ranking de subcategorías contando cuántas veces aparecen en distintos papers. Las subcategorías que aparezcan en más papers serán más generales.
  2. +
+
socarxiv <- articles[repository == "socarxiv"]
+socarxiv.v <- socarxiv[, .(weight=.N), by=subcategory][order(weight, decreasing=TRUE)]
+socarxiv.v
+
+ +
+
    +
  1. Utilizo ese ranking para quedarme solo con la subcategoría más general de cada paper, y con esto veo cuántas subcategorías me quedan.
  2. +
+
get_main <- function(data, del, all) {
+  data[!subcategory %in% del][
+    , .(main = all[all %in% subcategory][1]), by=id][
+      , .N, by=main][order(N, decreasing=TRUE)]
+}
+
+get_main(socarxiv, NULL, socarxiv.v$subcategory)
+
+ +
+

Dividiríamos todos los papers en solo subcategorías. En total, socarxiv tiene 2375 papers. Por tanto, está claro que “Social and Behavioural Sciences” es demasiado general. Si la borramos y repetimos el proceso, obtenemos:

+
del <- "Social and Behavioral Sciences"
+get_main(socarxiv, del, socarxiv.v$subcategory)
+
+ +
+

Ahora subcategorías parece mucho más razonable. Nos podemos quedar aquí o podemos seguir un poco más. Podemos partir “Sociology” y “Art and Humanities”. Además, dado que tenemos “Economics” y “Psychology” en otros repositorios, podemos partirlas también para ver qué nuevas subcategorías aparecen. A continuación se pintan las nuevas subcategorías que aparecen a cada paso:

+
last <- get_main(socarxiv, del, socarxiv.v$subcategory)$main
+del <- c("Social and Behavioral Sciences", "Sociology", "Arts and Humanities", "Psychology", "Economics")
+for (i in seq_along(del)[-1]) {
+  sel <- get_main(socarxiv, del[1:i], socarxiv.v$subcategory)
+  cat("Al eliminar", del[i], "aparece:\n")
+  print(setdiff(sel$main, last)); cat("\n")
+  last <- sel$main
+}
+
## Al eliminar Sociology aparece:
+##  [1] "Inequality, Poverty, and Mobility"                           
+##  [2] "Population"                                                  
+##  [3] "Sex and Gender"                                              
+##  [4] "Economic Sociology"                                          
+##  [5] "Family"                                                      
+##  [6] "Culture"                                                     
+##  [7] "Crime, Law, and Deviance"                                    
+##  [8] "Collective Behavior and Social Movements"                    
+##  [9] "Sociology of Education"                                      
+## [10] "Political Sociology"                                         
+## [11] "Methodology"                                                 
+## [12] "Racial and Ethnic Minorities"                                
+## [13] "Organizations, Occupations, and Work"                        
+## [14] "Children and Youth"                                          
+## [15] "Communication, Information Technologies, and Media Sociology"
+## [16] "Science, Knowledge, and Technology"                          
+## [17] "Sociology of Religion"                                       
+## [18] "Other Sociology"                                             
+## [19] "Sexualities"                                                 
+## [20] "Theory"                                                      
+## [21] "International Migration"                                     
+## [22] "Environmental Sociology"                                     
+## [23] "Race, Gender, and Class"                                     
+## [24] "Community and Urban Sociology"                               
+## [25] "Medical Sociology"                                           
+## [26] "Aging and the Life Course"                                   
+## [27] "Disability and Society"                                      
+## [28] "Mathematical Sociology"                                      
+## [29] "Social Psychology and Interaction"                           
+## [30] "Global and Transnational Sociology"                          
+## [31] "Animals and Society"                                         
+## [32] "Development"                                                 
+## [33] "Alcohol, Drugs, Tobacco"                                     
+## [34] "Comparative and Historical Sociology"                        
+## [35] "Altruism, Morality, and Social Solidarity"                   
+## [36] "Sociological Practice and Public Sociology"                  
+## [37] "Labor and Labor Movements"                                   
+## [38] "Rationality and Society"                                     
+## [39] "Consumers and Consumption"                                   
+## 
+## Al eliminar Arts and Humanities aparece:
+##  [1] "English Language and Literature"                  
+##  [2] "Philosophy"                                       
+##  [3] "History"                                          
+##  [4] "History of Art, Architecture, and Archaeology"    
+##  [5] "Digital Humanities"                               
+##  [6] "Film and Media Studies"                           
+##  [7] "Religion"                                         
+##  [8] "Music"                                            
+##  [9] "Medieval Studies"                                 
+## [10] "Photography"                                      
+## [11] "Art Practice"                                     
+## [12] "African Languages and Societies"                  
+## [13] "Fine Arts"                                        
+## [14] "Animal Studies"                                   
+## [15] "Feminist, Gender, and Sexuality Studies"          
+## [16] "Other Arts and Humanities"                        
+## [17] "South and Southeast Asian Languages and Societies"
+## [18] "Comparative Literature"                           
+## [19] "Food Studies"                                     
+## [20] "Near Eastern Languages and Societies"             
+## [21] "Latin American Languages and Societies"           
+## [22] "Classics"                                         
+## [23] "Race, Ethnicity and Post-Colonial Studies"        
+## [24] "Television"                                       
+## [25] "American Studies"                                 
+## [26] "Art and Design"                                   
+## 
+## Al eliminar Psychology aparece:
+##  [1] "Social Psychology"                       
+##  [2] "Cognition and Perception"                
+##  [3] "Health Psychology"                       
+##  [4] "Theory and Philosophy"                   
+##  [5] "Child Psychology"                        
+##  [6] "Biological Psychology"                   
+##  [7] "Clinical Psychology"                     
+##  [8] "Quantitative Psychology"                 
+##  [9] "Industrial and Organizational Psychology"
+## [10] "Cognitive Psychology"                    
+## [11] "Developmental Psychology"                
+## 
+## Al eliminar Economics aparece:
+##  [1] "Finance"                 "Political Economy"      
+##  [3] "Behavioral Economics"    "Growth and Development" 
+##  [5] "Labor Economics"         "Other Economics"        
+##  [7] "Econometrics"            "Economic Theory"        
+##  [9] "Economic History"        "Health Economics"       
+## [11] "Macroeconomics"          "Public Economics"       
+## [13] "Industrial Organization" "Income Distribution"    
+## [15] "Regional Economics"      "International Economics"
+## [17] "Mental Health"
+ + + +
+
+ +
+ + + + + + + + + + + + + + + +