Skip to content

Latest commit



123 lines (95 loc) · 2.91 KB

File metadata and controls

123 lines (95 loc) · 2.91 KB


Explanotary Data Analysis on the fly ...

Coolest snippets and pearls

# selecting and specifying columns and droping the rest, pretty cool! 
mtcars_spec_cols <- read_csv(file = "mtcars.csv",
                             col_types = cols_only(hp = col_integer(),
                                                   am = col_integer(),
                                                   vs = col_factor(levels = c("1", "0"), 
                                                                   include_na = FALSE)))
# nice way to combine data frames on equal dates contained in both data sets
sent_q1 <- sent[as.POSIXct(sent$Date) %in% agg_q1$date,]
# changing column names the cool way
survey_dt_associations <- survey_data %>%
    select(questions_table_one_EJA_paper, GDP_PER_CAPITA, digital_index, dichte) %>%


# creates the weeknumber as character that can be counted etc.
table(wday(r_downloads_year$date, label = TRUE))
# counting by weeks
count(week = floor_date(date, "week"))
# reordering variables in descending order, nice for graphs, not working if NAs are present
mutate(variable = fct_reorder(variable, reorder_by))

Horror movies EDA

alt text

# filter only 6 most common categories
mutate(distributor = fct_lump(distributor, n = 6)
# turn the datafram upside down with row_numbers()
# extract the dacade out of the release_date
10 * floor(year(movie_profit$release_date) / 10)
# creates a nice summarisation for multiple variables at once
summarise_at(vars(production_budget:worldwide_gross), median)
# adding release year at the end of movies title
mutate(movie = paste0(movie, " (", year(release_date), " )")


# custom labeling of the y or x axis ... damn cool 
scale_y_continuous(labels = function(x) paste0(x, "X"))

Profit gross

# 44:14 x axis shoul be on a free scale and not similar for all facets
facet_wrap(~ distributor, scales = "free_x")
# plot percents
cale_y_continuous(labels = percent_format())

Country codes

# converting EN, ES etc in readble country names
count(country = countrycode(country, "iso2c", ""), sort = TRUE)