-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy path06_movies.Rmd
78 lines (58 loc) · 2.44 KB
/
06_movies.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
name: movies
# Movies
---
```{r, echo = F}
dir <- "raw_data"
file <- paste0(dir, "/", "movies.csv")
url <- "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2018/2018-10-23/movie_profit.csv"
# create raw_data folder
if (!dir.exists(dir)) { dir.create(dir) }
# download data from internet and save
if (!file.exists(file)) { download.file(url = url, destfile = file) }
# read in downloaded data
movies_0 <- readr::read_csv(file)
```
```{r, echo = F, warning=F, message=F}
library(ggrepel)
movies <- movies_0 %>%
mutate(release_date = lubridate::mdy(release_date)) %>%
filter(worldwide_gross != 0) %>%
mutate(label = paste0(movie, "\n$", round(production_budget/1000000, 2), "mil."))
```
---
```{r movies, eval=F, echo=F, fig.width=12}
ggplot(data = movies) +
aes(x = production_budget) +
aes(y = worldwide_gross/production_budget) +
geom_point(alpha = .25) +
facet_wrap(~ genre) +
theme_bw() +
aes(col = lubridate::year(release_date)) +
scale_color_viridis_c(option = "B") +
labs(col = "") +
scale_x_log10(labels = scales::dollar_format(),
limits = c(100000, 100000000)) +
scale_y_log10(breaks = c(.0001,.001,.01,.1,1,10,100, 1000),
label = c(".0001X", ".001X", ".01X", ".1X", "1X", "10X", "100X", "1000X"),
limits = c(.0001, 1000)) +
aes(label = paste0(movie, "\n$", round(production_budget/1000000, 2),
"mil.")) +
ggpmisc::stat_dens2d_filter(data = movies %>% filter(worldwide_gross>10*production_budget),
geom = "text_repel", keep.fraction = 0.06,
size = 2.8, col = "darkgrey") +
ggpmisc::stat_dens2d_filter(data = movies %>% filter(worldwide_gross<production_budget),
geom = "text_repel", keep.fraction = 0.002,
size = 2.8, col = "darkgrey") +
geom_abline(intercept = 0, slope = 0, lty = 2, col = "darkgrey") +
labs(x = "production budget") +
labs(y = "Profit ratio (worldwide gross)") +
labs(title = "Profit ratio in a 538 movies dataset") +
labs(subtitle = "Data source: 538 via TidyTuesday | Vis: Gina Reynolds")
```
```{r, echo = F, warning=F, message=F, eval = T, fig.show='hide'}
get_what_save_what <- "movies"
eval(parse(text = paste(knitr:::knit_code$get(get_what_save_what), collapse = "")))
ggsave(paste0("figures/", get_what_save_what, ".png"), dpi = 300)
```
`r apply_reveal("movies")`
---