-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02a_model_stats_interp.R
74 lines (59 loc) · 2.27 KB
/
02a_model_stats_interp.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"
CSV to Latex:
a. Top 100 Coefs. for Soft/Hard
b. Generalization Error
"
# Set working dir.
setwd(githubdir)
# Load libs
library(xtable)
library(tools)
library(readr)
library(dplyr)
# Read in data
top100_hard <- read.csv("uk_not_news/tabs/url_uk_top100_hard.csv")
top100_soft <- read.csv("uk_not_news/tabs/url_uk_top100_soft.csv")
# Subset and rename for final output
top100_hard_tab <- cbind(top100_hard$term[1:34], top100_hard$term[35:68], c(top100_hard$term[69:100], "", ""))
names(top100_hard_tab) <- c("", "", "")
top100_soft_tab <- cbind(top100_soft$term[1:34], top100_soft$term[35:68], c(top100_soft$term[69:100], "", ""))
names(top100_soft_tab) <- c("", "", "")
# Output to .tex
print(
xtable(top100_hard_tab,
caption = "Top 100 Predictors of Hard News",
align = c("p{0.10\\textwidth}", "p{0.3\\textwidth}", "p{0.3\\textwidth}", "p{0.3\\textwidth}"), label = "tab:top_100_hard"),
include.rownames = FALSE,
floating = FALSE,
include.colnames = FALSE,
size = "\\small",
type = "latex",
sanitize.text.function = function(x){x},
caption.placement = "top",
tabular.environment = "longtable",
table.placement = "!htb",
file = "uk_not_news/tabs/url_uk_top100_hard.tex")
print(
xtable(top100_soft_tab,
caption = "Top 100 Predictors of Soft News",
align = c("p{0.10\\textwidth}", "p{0.3\\textwidth}", "p{0.3\\textwidth}", "p{0.3\\textwidth}"), label = "tab:top_100_soft"),
include.rownames = FALSE,
floating = FALSE,
include.colnames = FALSE,
size = "\\small",
type = "latex",
sanitize.text.function = function(x){x},
caption.placement = "top",
tabular.environment = "longtable",
table.placement = "!htb",
file = "uk_not_news/tabs/url_uk_top100_soft.tex")
# Generalization Error Using Hand Coded Articles
test_samp_manual <- read_csv("uk_not_news/tabs/test_sample_coded.csv")
uk_pred <- read_csv("uk_not_news/tabs/uk_media_url_pred.csv")
uk_pred$id <- 0:(nrow(uk_pred) - 1)
test <- test_samp_manual %>%
left_join(uk_pred, by = "id")
# Confusion Matrix
table(test$pred_label, test$label.x)
1 - sum(diag(table(test$pred_label, test$label.x)))/sum(!is.na(test$label.x))
# 0.8770161