-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Kun-Wu Lyu
authored and
Kun-Wu Lyu
committed
Aug 14, 2024
0 parents
commit b6c0286
Showing
66 changed files
with
2,222 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.Rproj.user | ||
.Rhistory | ||
.RData | ||
.Ruserdata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,343 @@ | ||
--- | ||
title: "Event Data Analysis" | ||
author: "Kunwu Lyu" | ||
date: "2024-06-26" | ||
output: pdf_document | ||
--- | ||
|
||
```{r setup, include=FALSE} | ||
knitr::opts_chunk$set(echo = TRUE, | ||
size = "small", | ||
collapse = TRUE, | ||
comment = NA, | ||
warning = FALSE, | ||
message = FALSE, | ||
error = TRUE) # change it to TRUE | ||
``` | ||
|
||
|
||
## Loading Packages | ||
```{r} | ||
library(dplyr) | ||
library(tidyverse) | ||
library(stringr) | ||
library(lubridate) | ||
library(tidyr) | ||
library(forcats) | ||
library(ggplot2) | ||
library(viridis) | ||
library(viridisLite) | ||
library(purrr) | ||
library(gganimate) | ||
library(ggthemes) | ||
library(leaflet) | ||
library(patchwork) | ||
library(readr) | ||
library(janitor) | ||
library(plotly) | ||
``` | ||
|
||
|
||
## Data Wrangling | ||
|
||
```{r} | ||
# Define the directory containing the files | ||
data_dir <- "data/" | ||
# Define the file codes | ||
file_codes <- c("F15", "W16", "S16", "F16", "W17", "S17", "F17", "W18", "S18", "F18", | ||
"W19", "S19", "F19", "W20", "S20", "F20", "W21", "S21", "F21", "W22", | ||
"S22", "F22", "W23", "S23", "F23", "W24", "S24") | ||
# Create a function to read a file given its code | ||
read_and_clean_event_file <- function(code) { | ||
file_path <- paste0(data_dir, "2015-2024 Events Data - ", code, " - Event Data.csv") | ||
data <- read_csv(file_path) | ||
clean_names(data) %>% | ||
mutate(across(everything(), as.character)) %>% | ||
mutate(term = code) | ||
} | ||
term_to_year <- function(term) { | ||
year <- as.numeric(str_sub(term, 2, 3)) | ||
season <- str_sub(term, 1, 1) | ||
start_year <- if_else(season == "F", 2000 + year, 2000 + year - 1) | ||
end_year <- start_year + 1 | ||
return(paste0(start_year, "-", end_year)) | ||
} | ||
# Use purrr to read all files and store them in a named list | ||
event_data_list <- set_names(map(file_codes, read_and_clean_event_file), file_codes) | ||
combined_data <- reduce(event_data_list, full_join) | ||
combined_data_filtered <- combined_data %>% | ||
filter(!is.na(what), what != "") %>% | ||
filter(what != "Choir & Jazz Rehearsal") %>% | ||
filter(what != "Jazz Rehearsal") %>% | ||
mutate(date = as.Date(ymd(date))) %>% | ||
mutate(support_level = if_else(support_level == "N" | support_level == "Y", "L", support_level)) %>% | ||
mutate( | ||
department = str_replace_all(department, "WCC", "ODOA"), | ||
department = str_replace_all(department, "MSUC", "MUSC"), | ||
department = str_replace_all(department, "French Dept|French", "FREN"), | ||
department = str_replace_all(department, "English", "ENGL"), | ||
department = str_replace_all(department, "Pres. Office", "PRES"), | ||
department = str_replace_all(department, "History", "HIST"), | ||
department = str_replace_all(department, "THD", "THDA"), | ||
department = str_replace_all(department, "Inclusion & Equity", "IEC"), | ||
venue = str_replace_all(venue, "Skinner Chapel", "Chapel"), | ||
department = str_replace_all(department, "/", " & "), | ||
department = str_replace_all(department, ",", " &"), | ||
venue = str_replace_all(venue, ",", " &"), | ||
department = str_replace_all(department, "\\s+", " ") # Remove extra spaces | ||
) %>% | ||
select(-wk) %>% | ||
mutate(department_type = case_when( | ||
department == "MUSC" ~ "MUSC", | ||
department == "ODOA" ~ "ODOA", | ||
department == "CSA" ~ "CSA", | ||
str_detect(department, "&") ~ "Collab", | ||
TRUE ~ "Others" | ||
)) %>% | ||
mutate(what = str_replace_all(what, "Jazz Ensemble Concert|Jazz Area Concert", "Jazz Concert"), | ||
what = str_replace_all(what, "Symphony Band Concert", "Symphony Concert"), | ||
what = str_replace_all(what, "Composition Recital", "Composition Showcase Recital"), | ||
what = str_replace_all(what, "Harpichord", "Harpsichord"), | ||
what = str_replace_all(what, "Emsemble", "Ensemble"), | ||
what = str_replace_all(what, "Juest Cellin'", "Just Cellin'"), | ||
what = str_replace_all(what, "Facutly|FACULTY|Mazariello", "Faculty")) %>% | ||
mutate(event_type = case_when( | ||
str_detect(what, "GUEST|ODOA|Concert Series|SPCO") ~ "Guest", | ||
str_detect(what, "Faculty") ~ "Faculty Recital", | ||
str_detect(what, "Student|Senior|Junior|Piano Recital: |Johnson|Verma Jameson") ~ "Student Recital", | ||
str_detect(what, "Studio Recital|Organ & Harpsichord|Composition Showcase Recital|Chamber Recital|Chamber Music Recital|Chamber Music|Organ Recital|Strings Recital|Violin & Viola|Violin/Viola|Drum Ensemble|Drum Recital|Voice Showcase Recital|Chinese Music Recital|Piano Studios Recital|Jazz Chamber|Piano Recital|Comps Fest|Recorder Recital|Music Ensemble") ~ "Studio Recital", | ||
str_detect(what, "Orchestra Concert|Jazz Concert|Symphony Concert|Symphony Band|Choir Concert|Orchestra and Choir|Chinese & Global|Chinese Global Concert|Chinese and Global|Chinese Music Concert|Chinese Music Ensemble|Chinese Ensemble|Music Comps|Jazz Vocal Concert") ~ "Ensemble Concert", | ||
str_detect(what, "CSA|Just Cellin|Lunar New Year|ACA|A Cappella|Accidentals|Exit 69|Date Knight|Knights|Knightingales|International Festival") ~ "Student Activity", | ||
str_detect(what, "Masterclass|Lecture|Symposium") ~ "Masterclass", | ||
str_detect(what, "Trustees|Trustee's|Presidents|Conference|President's|Presentation") ~ "Presentation", | ||
str_detect(what, "Clinic|Music Fest|Music Department Showcase|Melinda Russell|Launch|Event|Opening") ~ "Special Events", | ||
TRUE ~ "Guest" | ||
)) %>% | ||
mutate(year = term_to_year(term)) %>% | ||
mutate(term = factor(term, levels = c("F15", "W16", "S16", "F16", "W17", "S17", | ||
"F17", "W18", "S18", "F18", "W19", "S19", | ||
"F19", "W20", "S20", "F20", "W21", "S21", | ||
"F21", "W22", "S22", "F22", "W23", "S23", | ||
"F23", "W24", "S24"), ordered = TRUE)) %>% | ||
arrange(year, term) %>% | ||
mutate( | ||
term_category = case_when( | ||
str_detect(term, "^F") ~ "Fall", | ||
str_detect(term, "^W") ~ "Winter", | ||
str_detect(term, "^S") ~ "Spring" | ||
) | ||
) %>% | ||
mutate(term_category = factor(term_category, levels = c("Spring", "Winter", "Fall"), ordered = TRUE)) | ||
glimpse(combined_data_filtered) | ||
event_summary <- combined_data_filtered %>% | ||
group_by(year, term) %>% | ||
summarize(term_total = n(), .groups = 'drop') %>% | ||
group_by(year) %>% | ||
mutate(year_total = sum(term_total)) %>% | ||
ungroup() | ||
``` | ||
|
||
|
||
|
||
## EDA Plots | ||
|
||
# Overall Event Summary | ||
```{r} | ||
# Sort the data frame by year and term in the order of F, W, S | ||
event_summary <- event_summary %>% | ||
mutate(term = factor(term, levels = c("F15", "W16", "S16", "F16", "W17", "S17", | ||
"F17", "W18", "S18", "F18", "W19", "S19", | ||
"F19", "W20", "S20", "F20", "W21", "S21", | ||
"F21", "W22", "S22", "F22", "W23", "S23", | ||
"F23", "W24", "S24"), ordered = TRUE)) %>% | ||
arrange(year, term) %>% | ||
mutate( | ||
term_category = case_when( | ||
str_detect(term, "^F") ~ "Fall", | ||
str_detect(term, "^W") ~ "Winter", | ||
str_detect(term, "^S") ~ "Spring" | ||
) | ||
) %>% | ||
mutate(term_category = factor(term_category, levels = c("Fall", "Winter", "Spring"), ordered = TRUE)) | ||
# Create the stacked bar chart | ||
ggplot(event_summary, aes(x = year, y = term_total, fill = term_category)) + | ||
geom_bar(stat = "identity") + | ||
geom_text(aes(label = term_total), | ||
position = position_stack(vjust = 0.5), | ||
size = 3, | ||
color = "black") + | ||
scale_fill_manual(values = c("Fall" = "#FF9999", "Winter" = "#99CCFF", "Spring" = "#99FF99")) + | ||
labs(x = "Year", y = "Total Events", fill = "Term") + | ||
theme_minimal() + | ||
theme(axis.text.x = element_text(angle = 45, hjust = 1)) | ||
``` | ||
|
||
|
||
# Breakdown of Events by Support Level | ||
```{r} | ||
library(dplyr) | ||
library(tidyr) | ||
library(purrr) | ||
library(ggplot2) | ||
# Function to summarize and pivot data for each year | ||
summarize_and_pivot <- function(current_year) { | ||
combined_data_filtered %>% | ||
filter(year == current_year) %>% | ||
group_by(term_category, support_level) %>% | ||
summarize(Support = n(), .groups = 'drop') %>% | ||
pivot_wider(names_from = support_level, values_from = Support, values_fill = list(Support = 0)) %>% | ||
mutate(year = current_year) # Add a year column to identify the table | ||
} | ||
# List of years to iterate over | ||
years <- unique(combined_data_filtered$year) | ||
# Use purrr::map to apply summarize_and_pivot function for each year | ||
support_tables <- map_dfr(years, summarize_and_pivot) # Combine into a single data frame | ||
# Melt the data for ggplot | ||
support_tables_melted <- support_tables %>% | ||
pivot_longer(cols = -c(term_category, year), names_to = "support_level", values_to = "Support") | ||
# Ensure the term_category and support_level are factors with the correct order | ||
support_tables_melted <- support_tables_melted %>% | ||
filter(support_level != "NA") %>% | ||
mutate(term_category = factor(term_category, levels = c("Fall", "Winter", "Spring"), ordered = TRUE), | ||
support_level = factor(support_level, levels = c("NA", "H", "M", "L"), ordered = TRUE)) | ||
# Calculate percentages for each segment | ||
support_tables_melted <- support_tables_melted %>% | ||
group_by(year, term_category) %>% | ||
mutate(total_support = sum(Support), | ||
percentage = (Support / total_support) * 100) | ||
# Create the facet plot | ||
ggplot(support_tables_melted, aes(x = term_category, y = Support, fill = support_level)) + | ||
geom_bar(stat = "identity", position = "stack") + | ||
geom_text(aes(label = paste0(Support, " (", round(percentage, 1), "%)")), | ||
position = position_stack(vjust = 0.5), size = 2, color = "black") + | ||
facet_wrap(~ year) + | ||
labs(x = "Term", y = "Support Count", fill = "Support Level") + | ||
scale_fill_manual(values = c("H" = "#FF9999", "M" = "#99CCFF", "L" = "#99FF99", "NA" = "black")) + | ||
theme_minimal() + | ||
theme(axis.text.x = element_text(angle = 45, hjust = 1)) | ||
``` | ||
|
||
|
||
# Breakdown of Events by Department/Source | ||
|
||
```{r} | ||
# Function to summarize and pivot data for each year | ||
summarize_and_pivot_department <- function(current_year) { | ||
combined_data_filtered %>% | ||
filter(year == current_year) %>% | ||
group_by(term_category, department_type) %>% | ||
summarize(DepartmentCount = n(), .groups = 'drop') %>% | ||
pivot_wider(names_from = department_type, values_from = DepartmentCount, values_fill = list(DepartmentCount = 0)) %>% | ||
mutate(year = current_year) # Add a year column to identify the table | ||
} | ||
# List of years to iterate over | ||
years <- unique(combined_data_filtered$year) | ||
# Use purrr::map to apply summarize_and_pivot function for each year | ||
department_tables <- map_dfr(years, summarize_and_pivot_department) # Combine into a single data frame | ||
# Melt the data for ggplot | ||
department_tables_melted <- department_tables %>% | ||
pivot_longer(cols = -c(term_category, year), names_to = "department_type", values_to = "DepartmentCount") | ||
# Ensure the term_category and department_type are factors with the correct order | ||
department_tables_melted <- department_tables_melted %>% | ||
mutate(term_category = factor(term_category, levels = c("Fall", "Winter", "Spring"), ordered = TRUE), | ||
department_type = factor(department_type, levels = c("MUSC", "ODOA", "CSA", "Collab", "Others"), ordered = TRUE)) | ||
# Filter out NA department types (if any) | ||
department_tables_melted <- department_tables_melted %>% | ||
filter(department_type != "NA") | ||
# Calculate percentages for each segment | ||
department_tables_melted <- department_tables_melted %>% | ||
group_by(year, term_category) %>% | ||
mutate(total_count = sum(DepartmentCount, na.rm = T), | ||
percentage = (DepartmentCount / total_count) * 100) | ||
# Create the facet plot | ||
ggplot(department_tables_melted, aes(x = term_category, y = DepartmentCount, fill = department_type)) + | ||
geom_bar(stat = "identity", position = "stack") + | ||
geom_text(aes(label = paste0(DepartmentCount, " (", round(percentage, 1), "%)")), | ||
position = position_stack(vjust = 0.5), size = 2, color = "black", | ||
check_overlap = TRUE) + | ||
facet_wrap(~ year) + | ||
labs(x = "Term", y = "Department Count", fill = "Department Type") + | ||
scale_fill_manual(values = c("MUSC" = "#FF9999", "ODOA" = "#99CCFF", "CSA" = "#99FF99", "Collab" = "#FFD700", "Others" = "#FFA500")) + | ||
theme_minimal() + | ||
theme(axis.text.x = element_text(angle = 45, hjust = 1)) | ||
``` | ||
|
||
|
||
# Breakdown of Music, Collab & ODOA Events by Type | ||
```{r} | ||
# Function to summarize and pivot data for each year | ||
summarize_and_pivot_event <- function(current_year) { | ||
combined_data_filtered %>% | ||
filter(year == current_year) %>% | ||
group_by(term_category, event_type) %>% | ||
summarize(EventCount = n(), .groups = 'drop') %>% | ||
pivot_wider(names_from = event_type, values_from = EventCount, values_fill = list(EventCount = 0)) %>% | ||
mutate(year = current_year) # Add a year column to identify the table | ||
} | ||
# List of years to iterate over | ||
years <- unique(combined_data_filtered$year) | ||
# Use purrr::map to apply summarize_and_pivot function for each year | ||
event_tables <- map_dfr(years, summarize_and_pivot_event) # Combine into a single data frame | ||
# Melt the data for ggplot | ||
event_tables_melted <- event_tables %>% | ||
pivot_longer(cols = -c(term_category, year), names_to = "event_type", values_to = "EventCount") | ||
# Ensure the term_category and event_type are factors with the correct order | ||
event_tables_melted <- event_tables_melted %>% | ||
mutate(term_category = factor(term_category, levels = c("Fall", "Winter", "Spring"), ordered = TRUE), | ||
event_type = factor(event_type, levels = c("Ensemble Concert", "Student Activity", "Studio Recital", | ||
"Guest", "Faculty Recital", "Student Recital", | ||
"Special Events", "Presentation", "Masterclass"), | ||
ordered = TRUE)) | ||
# Calculate percentages for each segment | ||
event_tables_melted <- event_tables_melted %>% | ||
group_by(year, term_category) %>% | ||
mutate(total_count = sum(EventCount, na.rm = T), | ||
percentage = (EventCount / total_count) * 100) | ||
# Create the facet plot | ||
ggplot(event_tables_melted, aes(x = term_category, y = EventCount, fill = event_type)) + | ||
geom_bar(stat = "identity", position = "stack") + | ||
geom_text(aes(label = paste0(EventCount, " (", round(percentage, 1), "%)")), | ||
position = position_stack(vjust = 0.5), size = 2, color = "black", | ||
check_overlap = TRUE) + | ||
facet_wrap(~ year) + | ||
labs(x = "Term", y = "Event Count", fill = "Event Type") + | ||
scale_fill_brewer(palette = "Set2") + # Using a Brewer palette for default colors | ||
theme_minimal() + | ||
theme(axis.text.x = element_text(angle = 45, hjust = 1)) | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX |
Oops, something went wrong.