Skip to content

Latest commit





Sarah Leo from The Economist went through the Economist's archives and found 7 examples of charts that were in need of improvement.

"I grouped our crimes against data visualisation into three categories: charts that are (1) misleading, (2) confusing and (3) failing to make a point. For each, I suggest an improved version that requires a similar amount of space — an important consideration when drawing charts to be published in print."

She was nice enough to include the raw data as .csv files, where I have included both the raw and tidied formats for your graphing fun!

Get the data!

brexit <- readr::read_csv("")

corbyn <- readr::read_csv("")

dogs <- readr::read_csv("")

eu_balance <- readr::read_csv("")

pensions <- readr::read_csv("")

trade <- readr::read_csv("")

women_research <- readr::read_csv("")

Data Dictionaries


variable class description
date date Date of poll
percent_responding_right character Percent who said Britain vote to leave EU was right
percent_responding_wrong character Percent who said Britain vote to leave EU was wrong


variable class description
political_group character Political identity or group
avg_facebook_likes double Average number of facebook likes per Facebook post in 2016


variable class description
year integer Year registered with UK's kennel club
avg_weight character Average body weight in kg
avg_neck double Average neck size diameter in cm


variable class description
country character Country
account_type character Budget balances or current-account balances
year integer Year
value double Value in billion of euros


variable class description
country character Country
pop_65_percent double Percent of population aged 65 or older
gov_spend_percent_gdp double Percent of government spending on pension benefits as percent of GDP


variable class description
year integer Year
trade_deficit double US Trade deficit with China in goods in dollars
manufacture_employment double Manufacturing employment in the US


variable class description
country character country
field character Field of study
percent_women character Women among researchers with papers published 2011-15 as % of total by field of study

Cleaning Spoilers


### Brexit Raw

brexit_raw <- read_csv(here("2019", "2019-04-16", "Economist_brexit.csv"))

brexit_clean <- brexit_raw %>% 
  set_names(nm = .[3,]) %>% 
  clean_names() %>% 

brexit_clean %>% write_csv(here("2019", "2019-04-16", "brexit.csv"))

### corbyn

corbyn_raw <- read_csv(here("2019", "2019-04-16", "Economist_corbyn.csv"))

corbyn_clean <- corbyn_raw %>% 
  set_names(nm = "political_group", "avg_facebook_likes") %>% 

corbyn_clean %>% write_csv(here("2019", "2019-04-16", "corbyn.csv"))

### dogs

dogs_raw <- read_csv(here("2019", "2019-04-16", "Economist_dogs.csv"))

dogs_clean <- dogs_raw %>% 
  na.omit() %>% 
  set_names(nm = c("year", "avg_weight", "avg_neck"))

dogs_clean %>% write_csv(here("2019", "2019-04-16", "dogs.csv"))

### EU Balance

eu_balance_raw <- read_csv(here("2019", "2019-04-16", "Economist_eu-balance.csv"))

names_eu <- eu_balance_raw %>% 
  .[1,] %>% 


clean_names_eu <- c("country",

eu_current <- eu_balance_raw %>% 
  set_names(nm = clean_names_eu) %>% 
  filter(country != "Country") %>% 
  gather(year, value, starts_with("current")) %>% 
  select(-starts_with("budget")) %>% 
  separate(year, into = c("account_type", "year"))

eu_budget <- eu_balance_raw %>% 
  set_names(nm = clean_names_eu) %>% 
  filter(country != "Country") %>% 
  gather(year, value, starts_with("budget")) %>% 
  select(-starts_with("current")) %>% 
  separate(year, into = c("account_type", "year"))

eu_balance_clean <- bind_rows(eu_current, eu_budget)

eu_balance_clean %>% write_csv(here("2019", "2019-04-16", "eu_balance.csv"))

### Pensions

pensions_raw <- read_csv(here("2019", "2019-04-16", "Economist_pensions.csv"))

pensions_clean <- pensions_raw %>% 
  na.omit() %>% 
  set_names(nm = c("country", "pop_65_percent", "gov_spend_percent_gdp"))

pensions_clean %>% write_csv(here("2019", "2019-04-16", "pensions.csv"))

### Trade

trade_raw <- read_csv(here("2019", "2019-04-16", "Economist_us-trade-manufacturing.csv"))

trade_clean <- trade_raw %>% 
  set_names(nm = c("year", "trade_deficit", "manufacture_employment")) %>% 
  mutate(trade_deficit = trade_deficit * 1e9,
         manufacture_employment = manufacture_employment * 1e6) %>% 

trade_clean %>% write_csv(here("2019", "2019-04-16", "trade.csv"))

### Women
women_research_raw <- read_csv(here("2019", "2019-04-16", "Economist_women-research.csv"))

women_research_raw[1,] %>% 
  as.character() %>% 

research_names <- c("country",
  "Health sciences",
  "Physical sciences",
  "Computer science, maths",
  "Women inventores")

women_research_clean <- women_research_raw %>% 
  na.omit() %>% 
  set_names(nm = research_names) %>% 
  filter(country != "Country") %>% 
  gather(field, percent_women, `Health sciences`:`Women inventores`)

women_research_clean %>% write_csv(here("2019", "2019-04-16", "women_research.csv"))