From fd4e47bd29debea44a5ad0c3fbbab2896b96df0c Mon Sep 17 00:00:00 2001 From: Aurelien Dugourd Date: Tue, 3 Dec 2024 11:18:14 +0000 Subject: [PATCH] some test from scratch and visual enhancement to existing --- .gitignore | 7 +++ F1000_Slack.Rproj | 17 +++++++ aurelien_sciwheel_stats_process.R | 74 +++++++++++++++++++++++++++++++ sciwheel_stats_process.R | 6 +-- 4 files changed, 101 insertions(+), 3 deletions(-) create mode 100644 F1000_Slack.Rproj create mode 100644 aurelien_sciwheel_stats_process.R diff --git a/.gitignore b/.gitignore index 53d0fcb..a0195ff 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ # RStudio files .Rproj.user/ +.Rbuildignore # produced vignettes vignettes/*.html @@ -38,3 +39,9 @@ vignettes/*.pdf # R Environment Variables .Renviron *.rdata + +# data object +Sciwheel_update/** + +# plots +overviewplots** diff --git a/F1000_Slack.Rproj b/F1000_Slack.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/F1000_Slack.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/aurelien_sciwheel_stats_process.R b/aurelien_sciwheel_stats_process.R new file mode 100644 index 0000000..a2c923c --- /dev/null +++ b/aurelien_sciwheel_stats_process.R @@ -0,0 +1,74 @@ +nested_list <- readRDS("data_dump.rds") + +# Extracting "f1000AddedBy" field +extract_added_by <- function(list_item) { + sapply(list_item$content, function(content_item) content_item$f1000AddedBy) +} + +# Flatten the list of "f1000AddedBy" values +added_by_list <- unlist(lapply(nested_list, extract_added_by), use.names = FALSE) + +# Count occurrences of each lab member +library(dplyr) +added_by_count <- as.data.frame(table(added_by_list)) %>% + rename(Member = added_by_list, Count = Freq) + +# Plot the data +library(ggplot2) +ggplot(added_by_count, aes(x = Member, y = Count)) + + geom_bar(stat = "identity") + + labs(title = "Number of Papers Added by Each Lab Member", + x = "Lab Member", + y = "Number of Papers") + + theme_minimal() + + +# Sort the data in descending order of counts +added_by_count <- added_by_count %>% + arrange(desc(Count)) + +# Plot the data with horizontal bars and sorted order +ggplot(added_by_count, aes(x = reorder(Member, Count), y = Count)) + + geom_bar(stat = "identity") + + labs(title = "Number of Papers Added by Each Lab Member", + x = "Lab Member", + y = "Number of Papers") + + theme_minimal() + + coord_flip() + + +##### + +# Extracting "f1000AddedBy" and "f1000AddedDate" fields robustly +extract_added_info <- function(list_item) { + if (is.null(list_item$content)) return(data.frame(f1000AddedBy = NA, f1000AddedDate = NA)) + do.call(rbind, lapply(list_item$content, function(content_item) { + data.frame( + f1000AddedBy = ifelse(is.null(content_item$f1000AddedBy), NA, content_item$f1000AddedBy), + f1000AddedDate = ifelse(is.null(content_item$f1000AddedDate), NA, content_item$f1000AddedDate) + ) + })) +} + +# Flatten and transform the nested list into a single data frame +added_info <- do.call(rbind, lapply(nested_list, extract_added_info)) + +# Convert dates from millis to R date format and filter by year 2024 +added_info <- added_info %>% + mutate(f1000AddedDate = as.POSIXct(as.numeric(f1000AddedDate) / 1000, origin = "1970-01-01"), + Year = format(f1000AddedDate, "%Y")) %>% + filter(Year == "2024" & !is.na(f1000AddedBy)) + +# Count occurrences of each lab member +added_by_count <- added_info %>% + count(f1000AddedBy, name = "Count") %>% + rename(Member = f1000AddedBy) + +# Plot the data with horizontal bars and sorted order +ggplot(added_by_count, aes(x = reorder(Member, Count), y = Count)) + + geom_bar(stat = "identity") + + labs(title = "Number of Papers Added by Each Lab Member in 2024", + x = "Lab Member", + y = "Number of Papers") + + theme_minimal() + + coord_flip() diff --git a/sciwheel_stats_process.R b/sciwheel_stats_process.R index 49765c0..69c44ed 100644 --- a/sciwheel_stats_process.R +++ b/sciwheel_stats_process.R @@ -158,7 +158,7 @@ per.user.df= lapply(names(per.user),function(x) (enframe(per.user[[x]]) %>% muta #simple plot ggplot(per.user.df, aes(x= name, y= folder, fill = value))+ - geom_tile() + geom_tile() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) # Notes -------------------------------------------------------------------------------- @@ -233,7 +233,7 @@ tags.per.club.df= lapply(names(tags.per.club),function(x) (enframe(tags.per.club #simple plot ggplot(tags.per.club.df, aes(x= name, y= folder, fill = value))+ - geom_tile() + geom_tile() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) #### tags per user tags.per.user = @@ -259,7 +259,7 @@ tags.per.user.df= lapply(names(tags.per.user),function(x) (do.call(rbind,tags.pe #simple plot ggplot(tags.per.user.df, aes(x= name, y= value, fill = n))+ - geom_tile() + geom_tile() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))