Skip to content

Commit

Permalink
Update raw builds (#7)
Browse files Browse the repository at this point in the history
* Switch from devtools to usethis for `use_data()`.

Explicitly set `header` if variable names are present in file.

* Re-build Rdas
  • Loading branch information
coatless committed Jul 18, 2024
1 parent 724bc4e commit fbc9977
Show file tree
Hide file tree
Showing 26 changed files with 42 additions and 36 deletions.
2 changes: 1 addition & 1 deletion data-raw/abalone_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ colnames(abalone) = c("sex",
"rings")

# Save dataset
devtools::use_data(abalone)
usethis::use_data(abalone, overwrite = TRUE)
3 changes: 2 additions & 1 deletion data-raw/autoimports_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ var_names_safe = gsub("-", "_", var_names)

colnames(autoimports) = var_names_safe

devtools::use_data(autoimports, overwrite = TRUE)
usethis::use_data(autoimports, overwrite = TRUE)



5 changes: 3 additions & 2 deletions data-raw/autompg_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ autompg = read.table(
"http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data",
quote = "\"",
comment.char = "",
stringsAsFactors = FALSE)
stringsAsFactors = FALSE,
header = FALSE)

colnames(autompg) = c("mpg", "cylinders", "displacement", "horsepower",
"weight", "acceleration", "model_year", "origin", "car_name")

devtools::use_data(autompg)
usethis::use_data(autompg, overwrite = TRUE)
2 changes: 1 addition & 1 deletion data-raw/bcw_original_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ bcw_original = breast_cancer_wis_data

rm(list="breast_cancer_wis_data")

devtools::use_data(bcw_original, overwrite = TRUE)
usethis::use_data(bcw_original, overwrite = TRUE)
44 changes: 23 additions & 21 deletions data-raw/bike_sharing_daily_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,28 @@ download.file("http://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bi

# Unzip and load bike sharing data into R
# Note, data has a header in it!
bike_sharing_daily = read.csv(unz("data-raw/Bike-Sharing-Dataset.zip",
"day.csv"),
colClasses = c("character", # instant
"Date", # dteday
"factor", # season
"factor", # yr
"factor", # mnth
"factor", # holiday
"factor", # weekday
"factor", # workingday
"factor", # weathersit
"numeric", # temp
"numeric", # atemp
"numeric", # hum
"numeric", # windspeed
"integer", # casual
"integer", # registered
"integer" # cnt
)
)
bike_sharing_daily = read.csv(
unz("data-raw/Bike-Sharing-Dataset.zip", "day.csv"),
header = TRUE,
colClasses = c(
"character", # instant
"Date", # dteday
"factor", # season
"factor", # yr
"factor", # mnth
"factor", # holiday
"factor", # weekday
"factor", # workingday
"factor", # weathersit
"numeric", # temp
"numeric", # atemp
"numeric", # hum
"numeric", # windspeed
"integer", # casual
"integer", # registered
"integer" # cnt
)
)

# Improve factor labels
bike_sharing_daily = within(bike_sharing_daily, {
Expand All @@ -52,7 +54,7 @@ bike_sharing_daily = within(bike_sharing_daily, {
# })

# Write the bike_sharing_daily dataset
devtools::use_data(bike_sharing_daily, overwrite = TRUE)
usethis::use_data(bike_sharing_daily, overwrite = TRUE)

# Remove the zip + csv after read in.
file.remove("data-raw/Bike-Sharing-Dataset.zip")
2 changes: 1 addition & 1 deletion data-raw/bridges_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ bridges = within(bridges, {
lanes = factor(lanes)
})

devtools::use_data(bridges, overwrite = TRUE)
usethis::use_data(bridges, overwrite = TRUE)

2 changes: 1 addition & 1 deletion data-raw/car_eval_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ colnames(car_eval) = c("buying",
"safety",
"class_value")

devtools::use_data(car_eval, overwrite = TRUE)
usethis::use_data(car_eval, overwrite = TRUE)
2 changes: 1 addition & 1 deletion data-raw/forest_fires_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ url_forest_fires = "https://archive.ics.uci.edu/ml/machine-learning-databases/fo

forest_fires = read.csv(url_forest_fires, header = TRUE)

devtools::use_data(forest_fires, overwrite = TRUE)
usethis::use_data(forest_fires, overwrite = TRUE)
2 changes: 1 addition & 1 deletion data-raw/glass_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ glass = within(glass, {
})

# Save dataset
devtools::use_data(glass, overwrite = TRUE)
usethis::use_data(glass, overwrite = TRUE)
8 changes: 5 additions & 3 deletions data-raw/hepatitis_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

url_hepatitis = "http://archive.ics.uci.edu/ml/machine-learning-databases/hepatitis/hepatitis.data"

hepatitis = read.csv(url_hepatitis,
header = FALSE, na.strings = "?")
hepatitis = read.csv(
url_hepatitis,
header = FALSE, na.strings = "?"
)

# Columns taken verbatim from ML page
# Regex search with: [0-9]{1,2}\. (.*):.*
Expand Down Expand Up @@ -46,7 +48,7 @@ hepatitis = within(hepatitis,{
sex = factor(sex, labels = c("Male", "Female"))
})

devtools::use_data(hepatitis, overwrite = TRUE)
usethis::use_data(hepatitis, overwrite = TRUE)

## output colnames
cat(paste0(colnames(hepatitis),"\n"), sep="")
6 changes: 3 additions & 3 deletions data-raw/wine_build.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ red_wine_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-
white_wine_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"

# Note the .csv uses a `;` as the separater. Not `,`
red_wine_data = read.csv(red_wine_url, sep = ";")
white_wine_data = read.csv(white_wine_url, sep = ";")
red_wine_data = read.csv(red_wine_url, sep = ";", header = TRUE)
white_wine_data = read.csv(white_wine_url, sep = ";", header = TRUE)

# Load in Red vs. White Data
red_wine_data$color = "Red"
Expand All @@ -22,4 +22,4 @@ wine$color = as.factor(wine$color)
# Remove periods
colnames(wine) = gsub("\\.", "_", colnames(wine))

devtools::use_data(wine)
usethis::use_data(wine, overwrite = TRUE)
Binary file modified data/abalone.rda
Binary file not shown.
Binary file modified data/autoimports.rda
Binary file not shown.
Binary file modified data/autompg.rda
Binary file not shown.
Binary file modified data/bcw_original.rda
Binary file not shown.
Binary file modified data/bike_sharing_daily.rda
Binary file not shown.
Binary file modified data/bridges.rda
Binary file not shown.
Binary file modified data/car_eval.rda
Binary file not shown.
Binary file modified data/forest_fires.rda
Binary file not shown.
Binary file modified data/glass.rda
Binary file not shown.
Binary file modified data/heart_disease_ch.rda
Binary file not shown.
Binary file modified data/heart_disease_cl.rda
Binary file not shown.
Binary file modified data/heart_disease_hu.rda
Binary file not shown.
Binary file modified data/heart_disease_va.rda
Binary file not shown.
Binary file modified data/hepatitis.rda
Binary file not shown.
Binary file modified data/wine.rda
Binary file not shown.

0 comments on commit fbc9977

Please sign in to comment.