From aa67669c097452608a28168af7e3fb7c3c452821 Mon Sep 17 00:00:00 2001 From: Nick-Eagles Date: Tue, 3 Oct 2023 13:54:27 -0400 Subject: [PATCH] Add a script documenting how the 'job_info' example output was generated (including the anonymization of username and job name) --- dev/interactive_testing/clean_job_info.R | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 dev/interactive_testing/clean_job_info.R diff --git a/dev/interactive_testing/clean_job_info.R b/dev/interactive_testing/clean_job_info.R new file mode 100644 index 0000000..66a0d7a --- /dev/null +++ b/dev/interactive_testing/clean_job_info.R @@ -0,0 +1,29 @@ +library(here) +library(dplyr) +library(stringr) +source(here('R', 'job_info.R')) + +# Randomly grab 100 jobs running now on the 'shared' partition +job_df = job_info(user = NULL) |> + sample_n(size = 100) |> + arrange(job_id) + +# A vector whose values are anonymous usernames and whose names are the +# original usernames +user_map = paste0('user', 1:length(unique(job_df$user))) +names(user_map) = unique(job_df$user) + +# Similarly for job names, though we'll keep the generic name for interactive +# jobs ('bash') +name_map = paste0('my_job_', 1:length(unique(job_df$name))) +names(name_map) = unique(job_df$name) +name_map['bash'] = 'bash' + +# Anonymize username and job name +job_df = job_df |> + mutate( + user = user_map[user], + name = name_map[name] + ) + +saveRDS(job_df, here('inst', 'extdata', 'job_info_df.rds'))