Skip to content

Commit

Permalink
Finalized update and deleting terms
Browse files Browse the repository at this point in the history
  • Loading branch information
kwlyu committed Sep 9, 2024
1 parent 4980a9a commit 9107c9c
Show file tree
Hide file tree
Showing 6 changed files with 965 additions and 227 deletions.
318 changes: 318 additions & 0 deletions Event-Data-Analysis.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -706,3 +706,321 @@ term_start_dates <- data.frame(
write_csv(term_start_dates, "data/term_list.csv")
```





```{r}
data_dir <- "data/"
term_list <- read_csv("data/term_list.csv")
dataWranglingfn <- function(term_list) {
# Create a function to read a file given its code
read_and_clean_event_file <- function(code) {
file_path <- paste0(data_dir, "2015-2024 Events Data - ", code, " - Event Data.csv")
data <- read_csv(file_path)
clean_names(data) %>%
mutate(across(everything(), as.character)) %>%
mutate(term = code)
}
term_to_year <- function(term) {
year <- as.numeric(str_sub(term, 2, 3))
season <- str_sub(term, 1, 1)
start_year <- if_else(season == "F", 2000 + year, 2000 + year - 1)
end_year <- start_year + 1
return(paste0(start_year, "-", end_year))
}
# Use purrr to read all files and store them in a named list
event_data_list <- set_names(map(term_list$term, read_and_clean_event_file), term_list$term)
combined_data <- reduce(event_data_list, full_join)
# Function to calculate the week of term
calculate_week_of_term <- function(event_date, term) {
start_date <- term_start_dates %>%
filter(term == !!term) %>%
pull(start_date)
if(length(start_date) == 0) return(NA_integer_) # Return NA if no start_date is found
# Calculate the week of the term based on Monday as the first day of the week
week_of_term <- as.integer((floor_date(event_date, unit = "week", week_start = 1) -
floor_date(start_date, unit = "week", week_start = 1)) / 7) + 1
return(week_of_term)
}
# Apply the new function to calculate week_of_term dynamically
combined_data_filtered <- combined_data %>%
filter(!is.na(what), what != "") %>%
filter(what != "Choir & Jazz Rehearsal") %>%
filter(what != "Jazz Rehearsal") %>%
mutate(date = as.Date(ymd(date))) %>%
mutate(
livestream = coalesce(livestream, live_stream) # If 'livestream' is NA, use 'live_stream'
) %>%
select(-live_stream) %>%
mutate(department = ifelse(str_detect(what, "CSA|Just Cellin|Lunar New Year|ACA|A Cappella|Accidentals|Exit 69|Date Knight|Knights|Knightingales|International Festival"), "CSA", department)) %>%
mutate(
support_level = fct_relevel(factor(support_level), "H", "M", "L"),
audio_needs = fct_relevel(factor(audio_needs), "H", "M", "L"),
stage_needs = fct_relevel(factor(stage_needs), "H", "M", "L"),
lighting_needs = fct_relevel(factor(lighting_needs), "H", "M", "L"),
projection = fct_relevel(factor(projection), "Y", "N"),
video_recording = fct_relevel(factor(video_recording), "Y", "N"),
livestream = fct_relevel(factor(livestream), "Y", "N"),
poster = fct_relevel(factor(poster), "Y", "N"),
program = fct_relevel(factor(program), "Y", "N"),
reception = fct_relevel(factor(reception), "Y", "N")
) %>%
mutate(
venue = factor(venue),
department = factor(department)
) %>%
mutate(
audience_count = as.numeric(ifelse(grepl("^[0-9]+$", audience_count), audience_count, NA)),
days_committed = as.numeric(ifelse(grepl("^[0-9]+$", days_committed), days_committed, NA)),
av_staff = as.numeric(ifelse(grepl("^[0-9]+$", av_staff), av_staff, NA)),
pac_staff = as.numeric(ifelse(grepl("^[0-9]+$", pac_staff), pac_staff, NA))
) %>%
mutate(support_level = if_else(support_level == "N" | support_level == "Y", "L", support_level)) %>%
mutate(
department = str_replace_all(department, "WCC", "ODOA"),
department = str_replace_all(department, "MSUC", "MUSC"),
department = str_replace_all(department, "French Dept|French", "FREN"),
department = str_replace_all(department, "English", "ENGL"),
department = str_replace_all(department, "Pres. Office", "PRES"),
department = str_replace_all(department, "History", "HIST"),
department = str_replace_all(department, "THD", "THDA"),
department = str_replace_all(department, "Inclusion & Equity", "IEC"),
venue = str_replace_all(venue, "Skinner Chapel", "Chapel"),
department = str_replace_all(department, "/", " & "),
department = str_replace_all(department, ",", " &"),
venue = str_replace_all(venue, ",", " &"),
department = str_replace_all(department, "\\s+", " ") # Remove extra spaces
) %>%
select(-wk) %>%
mutate(department_type = case_when(
department == "MUSC" ~ "MUSC",
department == "ODOA" ~ "ODOA",
department == "CSA" ~ "CSA",
str_detect(department, "&") ~ "Collab",
TRUE ~ "Others"
)) %>%
mutate(what = str_replace_all(what, "Jazz Ensemble Concert|Jazz Area Concert", "Jazz Concert"),
what = str_replace_all(what, "Symphony Band Concert", "Symphony Concert"),
what = str_replace_all(what, "Composition Recital", "Composition Showcase Recital"),
what = str_replace_all(what, "Harpichord", "Harpsichord"),
what = str_replace_all(what, "Emsemble", "Ensemble"),
what = str_replace_all(what, "Juest Cellin'", "Just Cellin'"),
what = str_replace_all(what, "Facutly|FACULTY|Mazariello", "Faculty")) %>%
mutate(event_type = case_when(
str_detect(what, "GUEST|ODOA|Concert Series|SPCO") ~ "Guest / Masterclass",
str_detect(what, "Faculty") ~ "Faculty Recital",
str_detect(what, "Faculty&Guest") ~ "Faculty + Guest",
str_detect(what, "Student|Senior|Junior|Piano Recital: |Johnson|Verma Jameson") ~ "Student Recital",
str_detect(what, "Studio Recital|Organ & Harpsichord|Composition Showcase Recital|Chamber Recital|Chamber Music Recital|Chamber Music|Organ Recital|Strings Recital|Violin & Viola|Violin/Viola|Drum Ensemble|Drum Recital|Voice Showcase Recital|Chinese Music Recital|Piano Studios Recital|Jazz Chamber|Piano Recital|Comps Fest|Recorder Recital|Music Ensemble|Studio") ~ "Studio Recital",
str_detect(what, "Orchestra Concert|Jazz Concert|Symphony Concert|Symphony Band|Choir Concert|Orchestra and Choir|Chinese & Global|Chinese Global Concert|Chinese and Global|Chinese Music Concert|Chinese Music Ensemble|Chinese Ensemble|Music Comps|Jazz Vocal Concert") ~ "Ensemble Concert",
str_detect(what, "CSA|Just Cellin|Lunar New Year|ACA|A Cappella|Accidentals|Exit 69|Date Knight|Knights|Knightingales|International Festival") ~ "Student Activity",
str_detect(what, "Masterclass|Lecture|Symposium") ~ "Guest / Masterclass",
str_detect(what, "Trustees|Trustee's|Presidents|Conference|President's|Presentation") ~ "Presentation",
str_detect(what, "Clinic|Music Fest|Music Department Showcase|Melinda Russell|Launch|Event|Opening") ~ "Special Events",
TRUE ~ "Guest"
)) %>%
mutate(year = term_to_year(term)) %>%
mutate(term = factor(term, levels = term_start_dates$term, ordered = TRUE)) %>%
arrange(year, term) %>%
mutate(
term_category = case_when(
str_detect(term, "^F") ~ "Fall",
str_detect(term, "^W") ~ "Winter",
str_detect(term, "^S") ~ "Spring"
)
) %>%
mutate(term_category = factor(term_category, levels = c("Spring", "Winter", "Fall"), ordered = TRUE)) %>%
# Apply week_of_term calculation
rowwise() %>%
mutate(week_of_term = calculate_week_of_term(date, term)) %>%
ungroup()
# Example event summary after filtering and transformation
event_summary <- combined_data_filtered %>%
group_by(year, term) %>%
summarize(term_total = n(), .groups = 'drop') %>%
group_by(year) %>%
mutate(year_total = sum(term_total)) %>%
ungroup()
combined_data_filtered %>% filter(event_type == "Guest / Masterclass",
str_detect(department, "MUSC")) %>%
filter(!str_detect(what, "Masterclass")) -> guest_only
# Get the unique years and append the "All" option
year_choices <- c("All", combined_data_filtered %>% pull(year) %>% unique())
return(list(combined_data_filtered = combined_data_filtered,
event_summary = event_summary,
year_choices = year_choices))
}
result_list <- dataWranglingfn(term_list)
combined_data_filtered <- result_list$combined_data_filtered
event_summary <- result_list$event_summary
year_choices <- result_list$year_choices
```


```{r}
term_list <- read_csv("data/term_list.csv")
guest_only_base <- read_csv("data/guest_only_base.csv")
dataWranglingfn <- function(term_list) {
# Set term_list as term_start_dates
term_start_dates <- term_list
# Create a function to read a file given its code
read_and_clean_event_file <- function(code) {
file_path <- paste0(data_dir, "2015-2024 Events Data - ", code, " - Event Data.csv")
data <- read_csv(file_path)
clean_names(data) %>%
mutate(across(everything(), as.character)) %>%
mutate(term = code)
}
# Convert term to academic year range
term_to_year <- function(term) {
year <- as.numeric(str_sub(term, 2, 3))
season <- str_sub(term, 1, 1)
start_year <- if_else(season == "F", 2000 + year, 2000 + year - 1)
end_year <- start_year + 1
return(paste0(start_year, "-", end_year))
}
# Use purrr to read all files and store them in a named list
event_data_list <- set_names(map(term_list$term, read_and_clean_event_file), term_list$term)
combined_data <- reduce(event_data_list, full_join)
# Function to calculate the week of term
calculate_week_of_term <- function(event_date, term) {
start_date <- term_start_dates %>%
filter(term == !!term) %>%
pull(start_date)
if (length(start_date) == 0) return(NA_integer_) # Return NA if no start_date is found
# Calculate the week of the term based on Monday as the first day of the week
week_of_term <- as.integer((floor_date(event_date, unit = "week", week_start = 1) -
floor_date(start_date, unit = "week", week_start = 1)) / 7) + 1
return(week_of_term)
}
# Filter and clean combined data
combined_data_filtered <- combined_data %>%
filter(!is.na(what), what != "") %>%
filter(what != "Choir & Jazz Rehearsal") %>%
filter(what != "Jazz Rehearsal") %>%
mutate(date = as.Date(ymd(date))) %>%
mutate(
livestream = coalesce(livestream, live_stream) # If 'livestream' is NA, use 'live_stream'
) %>%
select(-live_stream) %>%
# Perform further cleaning and factor conversions
mutate(department = ifelse(str_detect(what, "CSA|Just Cellin|Lunar New Year|ACA|A Cappella|Accidentals|Exit 69|Date Knight|Knights|Knightingales|International Festival"), "CSA", department)) %>%
mutate(
support_level = fct_relevel(factor(support_level), "H", "M", "L"),
audio_needs = fct_relevel(factor(audio_needs), "H", "M", "L"),
stage_needs = fct_relevel(factor(stage_needs), "H", "M", "L"),
lighting_needs = fct_relevel(factor(lighting_needs), "H", "M", "L"),
projection = fct_relevel(factor(projection), "Y", "N"),
video_recording = fct_relevel(factor(video_recording), "Y", "N"),
livestream = fct_relevel(factor(livestream), "Y", "N"),
poster = fct_relevel(factor(poster), "Y", "N"),
program = fct_relevel(factor(program), "Y", "N"),
reception = fct_relevel(factor(reception), "Y", "N")
) %>%
mutate(
venue = factor(venue),
department = factor(department)
) %>%
mutate(
audience_count = as.numeric(ifelse(grepl("^[0-9]+$", audience_count), audience_count, NA)),
days_committed = as.numeric(ifelse(grepl("^[0-9]+$", days_committed), days_committed, NA)),
av_staff = as.numeric(ifelse(grepl("^[0-9]+$", av_staff), av_staff, NA)),
pac_staff = as.numeric(ifelse(grepl("^[0-9]+$", pac_staff), pac_staff, NA))
) %>%
mutate(support_level = if_else(support_level == "N" | support_level == "Y", "L", support_level)) %>%
# Further cleaning
mutate(
department = str_replace_all(department, "WCC", "ODOA"),
department = str_replace_all(department, "MSUC", "MUSC"),
department = str_replace_all(department, "French Dept|French", "FREN"),
department = str_replace_all(department, "English", "ENGL"),
department = str_replace_all(department, "Pres. Office", "PRES"),
department = str_replace_all(department, "History", "HIST"),
department = str_replace_all(department, "THD", "THDA"),
department = str_replace_all(department, "Inclusion & Equity", "IEC"),
venue = str_replace_all(venue, "Skinner Chapel", "Chapel"),
department = str_replace_all(department, "/", " & "),
department = str_replace_all(department, ",", " &"),
venue = str_replace_all(venue, ",", " &"),
department = str_replace_all(department, "\\s+", " ") # Remove extra spaces
) %>%
# Handle department types
mutate(department_type = case_when(
department == "MUSC" ~ "MUSC",
department == "ODOA" ~ "ODOA",
department == "CSA" ~ "CSA",
str_detect(department, "&") ~ "Collab",
TRUE ~ "Others"
)) %>%
# Correcting event names
mutate(what = str_replace_all(what, "Jazz Ensemble Concert|Jazz Area Concert", "Jazz Concert")) %>%
mutate(year = term_to_year(term)) %>%
mutate(term = factor(term, levels = term_start_dates$term, ordered = TRUE)) %>%
arrange(year, term) %>%
mutate(
term_category = case_when(
str_detect(term, "^F") ~ "Fall",
str_detect(term, "^W") ~ "Winter",
str_detect(term, "^S") ~ "Spring"
)
) %>%
mutate(term_category = factor(term_category, levels = c("Spring", "Winter", "Fall"), ordered = TRUE)) %>%
# Apply week_of_term calculation
rowwise() %>%
mutate(week_of_term = calculate_week_of_term(date, term)) %>%
ungroup()
# Example event summary after filtering and transformation
event_summary <- combined_data_filtered %>%
group_by(year, term) %>%
summarize(term_total = n(), .groups = 'drop') %>%
group_by(year) %>%
mutate(year_total = sum(term_total)) %>%
ungroup()
# Filter guest-only events (if required)
combined_data_filtered <- combined_data_filtered %>%
left_join(guest_only_base %>% select(what, genre, sponsor), by = "what", relationship = "many-to-many")
combined_data_filtered %>%
filter(event_type == "Guest / Masterclass", str_detect(department, "MUSC")) %>%
filter(!str_detect(what, "Masterclass")) -> guest_only
# Get the unique years and append the "All" option
year_choices <- c("All", combined_data_filtered %>% pull(year) %>% unique())
return(list(combined_data_filtered = combined_data_filtered,
event_summary = event_summary,
year_choices = year_choices,
guest_only = guest_only))
}
dataWranglingfn(term_list)
```

Loading

0 comments on commit 9107c9c

Please sign in to comment.