Skip to content

Commit

Permalink
eda with regression
Browse files Browse the repository at this point in the history
  • Loading branch information
kwlyu committed Oct 2, 2024
1 parent 40538f2 commit 1bc39aa
Show file tree
Hide file tree
Showing 2 changed files with 1,726 additions and 0 deletions.
226 changes: 226 additions & 0 deletions Event-Data-Analysis.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -1024,3 +1024,229 @@ dataWranglingfn <- function(term_list) {
dataWranglingfn(term_list)
```

```{r}
# Load A BUNCH of packages
library(shiny)
library(shinydashboard)
library(shinydashboardPlus)
library(dplyr)
library(tidyverse)
library(reactable)
library(reactablefmtr)
library(shinycssloaders)
library(lubridate)
library(ggplot2)
library(shinyjs)
library(viridis)
library(viridisLite)
library(purrr)
library(stringr)
library(forcats)
library(gganimate)
library(ggthemes)
library(leaflet)
library(patchwork)
library(probably)
library(readr)
library(janitor)
library(shinyalert)
library(shinyWidgets)
library(tidyr)
library(tidymodels)
library(yardstick)
library(plotly)
library(hardhat)
library(vip)
library(rpart.plot)
library(ranger)
library(broom)
library(googlesheets4)
library(DT)
library(sodium)
library(formattable)
library(shinyfullscreen)
remotes::install_github("timelyportfolio/dataui")
gs4_auth(email = "[email protected]", cache = ".secrets")
## Local version
# Define the directory containing the files
data_dir <- "data/"
term_list <- read_csv("data/term_list.csv")
guest_only_base <- read_csv("data/guest_only_base.csv")
dataWranglingfn <- function(term_list) {
# Create a function to read a file given its code
read_and_clean_event_file <- function(code) {
file_path <- paste0(data_dir, "2015-2024 Events Data - ", code, " - Event Data.csv")
data <- read_csv(file_path)
clean_names(data) %>%
mutate(across(everything(), as.character)) %>%
mutate(term = code)
}
term_to_year <- function(term) {
year <- as.numeric(str_sub(term, 2, 3))
season <- str_sub(term, 1, 1)
start_year <- if_else(season == "F", 2000 + year, 2000 + year - 1)
end_year <- start_year + 1
return(paste0(start_year, "-", end_year))
}
# Use purrr to read all files and store them in a named list
event_data_list <- set_names(map(term_list$term, read_and_clean_event_file), term_list$term)
combined_data <- reduce(event_data_list, full_join)
# Function to calculate the week of term
calculate_week_of_term <- function(event_date, term) {
start_date <- term_list %>%
filter(term == !!term) %>%
pull(start_date)
if(length(start_date) == 0) return(NA_integer_) # Return NA if no start_date is found
# Calculate the week of the term based on Monday as the first day of the week
week_of_term <- as.integer((floor_date(event_date, unit = "week", week_start = 1) -
floor_date(start_date, unit = "week", week_start = 1)) / 7) + 1
return(week_of_term)
}
# Apply the new function to calculate week_of_term dynamically
combined_data_filtered <- combined_data %>%
filter(!is.na(what), what != "") %>%
filter(what != "Choir & Jazz Rehearsal") %>%
filter(what != "Jazz Rehearsal") %>%
mutate(date = as.Date(ymd(date))) %>%
mutate(
livestream = coalesce(livestream, live_stream) # If 'livestream' is NA, use 'live_stream'
) %>%
select(-live_stream) %>%
mutate(department = ifelse(str_detect(what, "CSA|Just Cellin|Lunar New Year|ACA|A Cappella|Accidentals|Exit 69|Date Knight|Knights|Knightingales|International Festival"), "CSA", department)) %>%
mutate(
support_level = fct_relevel(factor(support_level), "H", "M", "L"),
audio_needs = fct_relevel(factor(audio_needs), "H", "M", "L"),
stage_needs = fct_relevel(factor(stage_needs), "H", "M", "L"),
lighting_needs = fct_relevel(factor(lighting_needs), "H", "M", "L"),
projection = fct_relevel(factor(projection), "Y", "N"),
video_recording = fct_relevel(factor(video_recording), "Y", "N"),
livestream = fct_relevel(factor(livestream), "Y", "N"),
poster = fct_relevel(factor(poster), "Y", "N"),
program = fct_relevel(factor(program), "Y", "N"),
reception = fct_relevel(factor(reception), "Y", "N")
) %>%
mutate(
venue = factor(venue),
department = factor(department)
) %>%
mutate(
audience_count = as.numeric(ifelse(grepl("^[0-9]+$", audience_count), audience_count, NA)),
days_committed = as.numeric(ifelse(grepl("^[0-9]+$", days_committed), days_committed, NA)),
av_staff = as.numeric(ifelse(grepl("^[0-9]+$", av_staff), av_staff, NA)),
pac_staff = as.numeric(ifelse(grepl("^[0-9]+$", pac_staff), pac_staff, NA))
) %>%
mutate(support_level = if_else(support_level == "N" | support_level == "Y", "L", support_level)) %>%
mutate(
department = str_replace_all(department, "WCC", "ODOA"),
department = str_replace_all(department, "MSUC", "MUSC"),
department = str_replace_all(department, "French Dept|French", "FREN"),
department = str_replace_all(department, "English", "ENGL"),
department = str_replace_all(department, "Pres. Office", "PRES"),
department = str_replace_all(department, "History", "HIST"),
department = str_replace_all(department, "THD", "THDA"),
department = str_replace_all(department, "Inclusion & Equity", "IEC"),
venue = str_replace_all(venue, "Skinner Chapel", "Chapel"),
department = str_replace_all(department, "/", " & "),
department = str_replace_all(department, ",", " &"),
venue = str_replace_all(venue, ",", " &"),
department = str_replace_all(department, "\\s+", " ") # Remove extra spaces
) %>%
select(-wk) %>%
mutate(department_type = case_when(
department == "MUSC" ~ "MUSC",
department == "ODOA" ~ "ODOA",
department == "CSA" ~ "CSA",
str_detect(department, "&") ~ "Collab",
TRUE ~ "Others"
)) %>%
mutate(what = str_replace_all(what, "Jazz Ensemble Concert|Jazz Area Concert", "Jazz Concert"),
what = str_replace_all(what, "Symphony Band Concert", "Symphony Concert"),
what = str_replace_all(what, "Composition Recital", "Composition Showcase Recital"),
what = str_replace_all(what, "Harpichord", "Harpsichord"),
what = str_replace_all(what, "Emsemble", "Ensemble"),
what = str_replace_all(what, "Juest Cellin'", "Just Cellin'"),
what = str_replace_all(what, "Facutly|FACULTY|Mazariello", "Faculty")) %>%
mutate(event_type = case_when(
str_detect(what, "GUEST|ODOA|Concert Series|SPCO") ~ "Guest / Masterclass",
str_detect(what, "Faculty") ~ "Faculty Recital",
str_detect(what, "Student|Senior|Junior|Piano Recital: |Johnson|Verma Jameson") ~ "Student Recital",
str_detect(what, "Studio Recital|Organ & Harpsichord|Composition Showcase Recital|Chamber Recital|Chamber Music Recital|Chamber Music|Organ Recital|Strings Recital|Violin & Viola|Violin/Viola|Drum Ensemble|Drum Recital|Voice Showcase Recital|Chinese Music Recital|Piano Studios Recital|Jazz Chamber|Piano Recital|Comps Fest|Recorder Recital|Music Ensemble|Studio") ~ "Studio Recital",
str_detect(what, "Orchestra Concert|Jazz Concert|Symphony Concert|Symphony Band|Choir Concert|Orchestra and Choir|Chinese & Global|Chinese Global Concert|Chinese and Global|Chinese Music Concert|Chinese Music Ensemble|Chinese Ensemble|Music Comps|Jazz Vocal Concert") ~ "Ensemble Concert",
str_detect(what, "CSA|Just Cellin|Lunar New Year|ACA|A Cappella|Accidentals|Exit 69|Date Knight|Knights|Knightingales|International Festival") ~ "Student Activity",
str_detect(what, "Masterclass|Lecture|Symposium") ~ "Guest / Masterclass",
str_detect(what, "Trustees|Trustee's|Presidents|Conference|President's|Presentation") ~ "Presentation",
str_detect(what, "Clinic|Music Fest|Music Department Showcase|Melinda Russell|Launch|Event|Opening") ~ "Special Events",
TRUE ~ "Guest / Masterclass"
)) %>%
mutate(event_type = case_when(str_detect(what, "Faculty&Guest") ~ "Faculty + Guest",
TRUE ~ event_type # Keep existing values for other cases
)) %>%
mutate(year = term_to_year(term)) %>%
mutate(term = factor(term, levels = term_list$term, ordered = TRUE)) %>%
arrange(year, term) %>%
mutate(
term_category = case_when(
str_detect(term, "^F") ~ "Fall",
str_detect(term, "^W") ~ "Winter",
str_detect(term, "^S") ~ "Spring"
)
) %>%
mutate(term_category = factor(term_category, levels = c("Spring", "Winter", "Fall"), ordered = TRUE)) %>%
# Apply week_of_term calculation
rowwise() %>%
mutate(week_of_term = calculate_week_of_term(date, term)) %>%
ungroup()
# Example event summary after filtering and transformation
event_summary <- combined_data_filtered %>%
group_by(year, term) %>%
summarize(term_total = n(), .groups = 'drop') %>%
group_by(year) %>%
mutate(year_total = sum(term_total)) %>%
ungroup()
# combined_data_filtered <- combined_data_filtered %>%
# left_join(guest_only_base %>% select(venue, date, what, department, genre, sponsor), by = join_by(venue, date, what, department)
# # %>% select(what, genre, sponsor), by = "what", relationship = "many-to-many"
# )
# Perform the join
combined_data_filtered_updated <- combined_data_filtered %>%
left_join(guest_only_base %>% select(venue, date, what, department, term, genre, sponsor),
by = c("venue", "date", "what", "department", "term"))
combined_data_filtered_updated %>% filter(event_type == "Guest / Masterclass",
str_detect(department, "MUSC")) %>%
filter(!str_detect(what, "Masterclass")) -> guest_only
# Get the unique years and append the "All" option
year_choices <- c("All", combined_data_filtered_updated %>% pull(year) %>% unique())
return(list(combined_data_filtered = combined_data_filtered_updated,
event_summary = event_summary,
year_choices = year_choices,
guest_only = guest_only))
}
dataWranglingfn(term_list) -> test1
test1$combined_data_filtered %>% drop_na(audience_count, av_staff, pac_staff) -> combined_data_filtered_cleaned
concert_lm <- lm(audience_count ~ support_level + av_staff*pac_staff, data = combined_data_filtered_cleaned)
summary(concert_lm)
```

Loading

0 comments on commit 1bc39aa

Please sign in to comment.