From 5f21d93b98fc49a26673303f84ac306e913ab679 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 12:08:53 +0200 Subject: [PATCH 01/30] Add site_ID subdirectory in eager inputs/outputs --- README.md | 20 ++++++++++++-------- scripts/prepare_eager_tsv.R | 5 +++-- scripts/run_Eager.sh | 9 +++++---- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 4176f89..1ca875f 100644 --- a/README.md +++ b/README.md @@ -79,11 +79,13 @@ The eager input TSVs will be created in the following directory structure, given ``` eager_inputs ├── SG -│ ├── IND001 -│ └── IND002 +│ └──IND +│ ├── IND001 +│ └── IND002 └── TF - ├── IND001 - └── IND002 + └──IND + ├── IND001 + └── IND002 ``` ## run_eager.sh @@ -99,9 +101,11 @@ The outputs are saved with the same directory structure as the inputs, but in a ``` eager_outputs ├── SG -│ ├── IND001 -│ └── IND002 +│ └──IND +│ ├── IND001 +│ └── IND002 └── TF - ├── IND001 - └── IND002 + └──IND + ├── IND001 + └── IND002 ``` diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R index 45ad63e..125309a 100755 --- a/scripts/prepare_eager_tsv.R +++ b/scripts/prepare_eager_tsv.R @@ -30,14 +30,15 @@ save_ind_tsv <- function(data, rename, output_dir, ...) { ## Infer Individual Id(s) from input. ind_id <- data %>% select(Sample_Name) %>% distinct() %>% pull() - + site_id <- substr(ind_id,1,3) + if (rename) { data <- data %>% mutate(Library_ID=str_replace_all(Library_ID, "[.]", "_")) %>% ## Replace dots in the Library_ID to underscores. select(Sample_Name, Library_ID, Lane, Colour_Chemistry, SeqType, Organism, Strandedness, UDG_Treatment, R1, R2, BAM) } - ind_dir <- paste0(output_dir,"/",ind_id) + ind_dir <- paste0(output_dir, "/", site_id, "/", ind_id) if (!dir.exists(ind_dir)) {write(paste0("[prepare_eager_tsv.R]: Creating output directory '",ind_dir,"'"), stdout())} diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index 4adcef5..fddc8bf 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -20,9 +20,11 @@ for analysis_type in "SG" "TF"; do # echo ${analysis_type} analysis_profiles="${nextflow_profiles},${analysis_type}" # echo "${root_input_dir}/${analysis_type}" - for eager_input in ${root_input_dir}/${analysis_type}/*/*.tsv; do + for eager_input in ${root_input_dir}/${analysis_type}/*/*/*.tsv; do ## Set output directory name from eager input name - eager_output_dir="${root_output_dir}/${analysis_type}/$(basename ${eager_input} .tsv)" + ind_id = $(basename ${eager_input} .tsv) + site_id = "${ind_id:0:3}" + eager_output_dir="${root_output_dir}/${analysis_type}/${site_id}/${ind_id}" # ## Run name is individual ID followed by analysis_type # run_name="$(basename ${eager_input} .tsv)_${analysis_type}" # echo $run_name @@ -43,9 +45,8 @@ for analysis_type in "SG" "TF"; do -resume" ## Actually run eager now. - ## Email the submitting user the resulting MultiQC report. ## Monitor run in nf tower. Only works if TOWER_ACCESS_TOKEN is set. - ## TODO Maybe an EVA_Autorun account can be made for tower, to monitor runs outside of users? + ## Runs show in the Autorun_Eager workspace on tower.nf ${nxf_path}/nextflow run nf-core/eager \ -r ${eager_version} \ -profile ${analysis_profiles} \ From 5ad0f8a8dae2de597b3f7fa4be6ddacad6b60f25 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 12:39:12 +0200 Subject: [PATCH 02/30] Minor bugfix --- scripts/run_Eager.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index fddc8bf..1549f9c 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -22,8 +22,8 @@ for analysis_type in "SG" "TF"; do # echo "${root_input_dir}/${analysis_type}" for eager_input in ${root_input_dir}/${analysis_type}/*/*/*.tsv; do ## Set output directory name from eager input name - ind_id = $(basename ${eager_input} .tsv) - site_id = "${ind_id:0:3}" + ind_id=$(basename ${eager_input} .tsv) + site_id="${ind_id:0:3}" eager_output_dir="${root_output_dir}/${analysis_type}/${site_id}/${ind_id}" # ## Run name is individual ID followed by analysis_type # run_name="$(basename ${eager_input} .tsv)_${analysis_type}" From a874af68090df47eef7a88b8d6a688e91c7c08c4 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 12:39:21 +0200 Subject: [PATCH 03/30] Fix indentation --- README.md | 48 +++++++++++++++--------------- scripts/prepare_eager_tsv.R | 58 ++++++++++++++++++------------------- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 1ca875f..710c3c2 100644 --- a/README.md +++ b/README.md @@ -46,30 +46,30 @@ An R script that when given a sequencing batch ID, Autorun Analysis type and PAN Usage: ./prepare_eager_tsv.R [options] .credentials Options: - -h, --help - Show this help message and exit - - -s SEQUENCING_BATCH_ID, --sequencing_batch_id=SEQUENCING_BATCH_ID - The Pandora sequencing batch ID to update eager input for. A TSV file will be prepared - for each individual in this run, containing all relevant processed BAM files - from the individual - - -a ANALYSIS_TYPE, --analysis_type=ANALYSIS_TYPE - The analysis type to compile the data from. Should be one of: 'SG', 'TF'. - - -r, --rename - Changes all dots (.) in the Library_ID field of the output to underscores (_). - Some tools used in nf-core/eager will strip everything after the first dot (.) - from the name of the input file, which can cause naming conflicts in rare cases. - - -o OUTDIR/, --outDir=OUTDIR/ - The desired output directory. Within this directory, one subdirectory will be - created per analysis type, within that one subdirectory per individual ID, - and one TSV within each of these directory. - - -d, --debug_output - When provided, the entire result table for the run will be saved as '.results.txt'. - Helpful to check all the output data in one place. + -h, --help + Show this help message and exit + + -s SEQUENCING_BATCH_ID, --sequencing_batch_id=SEQUENCING_BATCH_ID + The Pandora sequencing batch ID to update eager input for. A TSV file will be prepared + for each individual in this run, containing all relevant processed BAM files + from the individual + + -a ANALYSIS_TYPE, --analysis_type=ANALYSIS_TYPE + The analysis type to compile the data from. Should be one of: 'SG', 'TF'. + + -r, --rename + Changes all dots (.) in the Library_ID field of the output to underscores (_). + Some tools used in nf-core/eager will strip everything after the first dot (.) + from the name of the input file, which can cause naming conflicts in rare cases. + + -o OUTDIR/, --outDir=OUTDIR/ + The desired output directory. Within this directory, one subdirectory will be + created per analysis type, within that one subdirectory per individual ID, + and one TSV within each of these directory. + + -d, --debug_output + When provided, the entire result table for the run will be saved as '.results.txt'. + Helpful to check all the output data in one place. Note: a valid sidora .credentials file is required. Contact the Pandora/Sidora team for details. ``` diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R index 125309a..6d2dad1 100755 --- a/scripts/prepare_eager_tsv.R +++ b/scripts/prepare_eager_tsv.R @@ -20,9 +20,9 @@ require(stringr) ## Validate analysis type option input validate_analysis_type <- function(option, opt_str, value, parser) { - valid_entries=c("TF", "SG") ## TODO comment: should this be embedded within the function? You would want to maybe update this over time no? + valid_entries <- c("TF", "SG") ## TODO comment: should this be embedded within the function? You would want to maybe update this over time no? ifelse(value %in% valid_entries, return(value), stop(call.=F, "\n[prepare_eager_tsv.R] error: Invalid analysis type: '", value, - "'\nAccepted values: ", paste(valid_entries,collapse=", "),"\n\n")) + "'\nAccepted values: ", paste(valid_entries,collapse=", "),"\n\n")) } ## Save one eager input TSV per individual. Rename if necessary. Input is already subset data. @@ -35,7 +35,7 @@ save_ind_tsv <- function(data, rename, output_dir, ...) { if (rename) { data <- data %>% mutate(Library_ID=str_replace_all(Library_ID, "[.]", "_")) %>% ## Replace dots in the Library_ID to underscores. select(Sample_Name, Library_ID, Lane, Colour_Chemistry, - SeqType, Organism, Strandedness, UDG_Treatment, R1, R2, BAM) + SeqType, Organism, Strandedness, UDG_Treatment, R1, R2, BAM) } ind_dir <- paste0(output_dir, "/", site_id, "/", ind_id) @@ -51,32 +51,32 @@ save_ind_tsv <- function(data, rename, output_dir, ...) { ## Parse arguments ---------------------------- parser <- OptionParser(usage = "%prog [options] .credentials") parser <- add_option(parser, c("-s", "--sequencing_batch_id"), type = 'character', - action = "store", dest = "sequencing_batch_id", - help = "The Pandora sequencing batch ID to update eager input for. A TSV file will be prepared + action = "store", dest = "sequencing_batch_id", + help = "The Pandora sequencing batch ID to update eager input for. A TSV file will be prepared for each individual in this run, containing all relevant processed BAM files from the individual") parser <- add_option(parser, c("-a", "--analysis_type"), type = 'character', - action = "callback", dest = "analysis_type", - callback = validate_analysis_type, default=NA, - help = "The analysis type to compile the data from. Should be one of: 'SG', 'TF'.") + action = "callback", dest = "analysis_type", + callback = validate_analysis_type, default=NA, + help = "The analysis type to compile the data from. Should be one of: 'SG', 'TF'.") parser <- add_option(parser, c("-r", "--rename"), type = 'logical', - action = 'store_true', dest = 'rename', default=F, - help = "Changes all dots (.) in the Library_ID field of the output to underscores (_). + action = 'store_true', dest = 'rename', default=F, + help = "Changes all dots (.) in the Library_ID field of the output to underscores (_). Some tools used in nf-core/eager will strip everything after the first dot (.) from the name of the input file, which can cause naming conflicts in rare cases." - ) + ) parser <- add_option(parser, c("-o", "--outDir"), type = 'character', - action = "store", dest = "outdir", - help= "The desired output directory. Within this directory, one subdirectory will be + action = "store", dest = "outdir", + help= "The desired output directory. Within this directory, one subdirectory will be created per analysis type, within that one subdirectory per individual ID, and one TSV within each of these directory." - ) + ) parser <- add_option(parser, c("-d", "--debug_output"), type = 'logical', - action = "store_true", dest = "debug", default=F, - help= "When provided, the entire result table for the run will be saved as '.results.txt'. + action = "store_true", dest = "debug", default=F, + help= "When provided, the entire result table for the run will be saved as '.results.txt'. Helpful to check all the output data in one place." ) - + arguments <- parse_args(parser, positional_arguments = 1) opts <- arguments$options @@ -140,18 +140,18 @@ results <- inner_join(complete_pandora_table, tibble_input_iids, by=c("individua R2=NA ) %>% select( - "Sample_Name"=individual.Full_Individual_Id, - "Library_ID"=library.Full_Library_Id, - "Lane", - "Colour_Chemistry", - "SeqType", - "Organism"=individual.Organism, - "Strandedness", - "UDG_Treatment", - "R1", - "R2", - "BAM" - ) + "Sample_Name"=individual.Full_Individual_Id, + "Library_ID"=library.Full_Library_Id, + "Lane", + "Colour_Chemistry", + "SeqType", + "Organism"=individual.Organism, + "Strandedness", + "UDG_Treatment", + "R1", + "R2", + "BAM" + ) ## Save results into single file for debugging if ( opts$debug ) { write_tsv(results, file=paste0(sequencing_batch_id, ".", analysis_type, ".results.txt")) } From f9b2bfe285ebc1cd12fd60fd93da77b883e40f10 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 12:43:23 +0200 Subject: [PATCH 04/30] linting --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 710c3c2..04a8c90 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # Autorun_eager -Automated nf-core/eager processing of Autorun output bams. +Automated nf-core/eager processing of Autorun output bams. -# Quickstart +## Quickstart - Run `prepare_eager_tsv.R` for human SG or TF data for a given sequencing batch: @@ -11,7 +11,7 @@ Automated nf-core/eager processing of Autorun output bams. prepare_eager_tsv.R -s 210802_K00233_0212_BHLH3FBBXY_SRdi_JR_BN -a TF -o eager_inputs/ -d .eva_credentials ``` -- Run eager with the following script, which then runs on the generated TSV files: +- Run eager with the following script, which then runs on the generated TSV files: ```bash run_eager.sh @@ -24,17 +24,17 @@ In such cases, an eager input TSV will still be created, but UDG treatment for a Contains the `autorun`, `SG` and `TF` profiles. -#### autorun +### autorun Broader scope options and parameters for use across all processing with autorun. Turns off automatic cleanup of intermediate files on successful completion of a run to allow resuming of the run when additional data becomes available, without rerunning completed steps. -#### SG +### SG The standardised parameters for processing human shotgun data. -#### TF +### TF The standardised parameters for processing human 1240k capture data. @@ -76,7 +76,7 @@ Note: a valid sidora .credentials file is required. Contact the Pandora/Sidora t The eager input TSVs will be created in the following directory structure, given `-o eager_inputs`: -``` +```text eager_inputs ├── SG │ └──IND @@ -98,7 +98,7 @@ data types. The outputs are saved with the same directory structure as the inputs, but in a separate parent directory. -``` +```text eager_outputs ├── SG │ └──IND From ff938bfd71784b5bef6eb99d2a08810e2f237fb4 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 13:25:28 +0200 Subject: [PATCH 05/30] Added Changelog --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f09f790 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,34 @@ +# Autorun_eager: Changelog + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [dev] - dd/mm/yyyy + +### `Added` + +### `Fixed` + +### `Dependencies` + +### `Deprecated` + +## [0.1.0] - 03/02/2022 + +Initial release of Autorun_eager. + +### `Added` + +- Configuration file with Autorun_eager parameter defaults in dedicated profiles for each analysis type. +- Script to prepare input TSV from pandora info, using Autorun outputted bams as input. +- Script to crawl through eager_inputs directory and run eager on each newly generated/updated input. +- cron script with the basic commands needed to run daily for full automation. + +### `Fixed` + +### `Dependencies` + +- [sidora.core](https://github.com/sidora-tools/sidora.core) +- [pandora2eager](https://github.com/sidora-tools/pandora2eager) + +### `Deprecated` From a4e0ec34846e79152eab1d0fc523cf895a8dc0a5 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 13:58:33 +0200 Subject: [PATCH 06/30] Informative run names. --- scripts/run_Eager.sh | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index 1549f9c..ffb6448 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -25,9 +25,17 @@ for analysis_type in "SG" "TF"; do ind_id=$(basename ${eager_input} .tsv) site_id="${ind_id:0:3}" eager_output_dir="${root_output_dir}/${analysis_type}/${site_id}/${ind_id}" - # ## Run name is individual ID followed by analysis_type - # run_name="$(basename ${eager_input} .tsv)_${analysis_type}" - # echo $run_name + + ## Give informative run names for easier trackingin tower.nf + ## If the output directory exists, assume you need to resume a run, else just name it + if [[ -d "${eager_output_dir}" ]]; then + command_string="-resume" + else + command_string="-name" + fi + ## Run name is individual ID followed by analysis_type. -resume or -name added as appropriate + run_name="${command_string} $(basename ${eager_input} .tsv)_${analysis_type}" + ## If no multiqc_report exists (last step of eager), or TSV is newer than the report, start an eager run. #### Always running with resume will ensure runs are only ever resumed instead of restarting. if [[ ${eager_input} -nt ${eager_output_dir}/multiqc/multiqc_report.html ]]; then @@ -42,7 +50,7 @@ for analysis_type in "SG" "TF"; do -w ${eager_output_dir}/work \ -with-tower \ -ansi-log false \ - -resume" + ${run_name}" ## Actually run eager now. ## Monitor run in nf tower. Only works if TOWER_ACCESS_TOKEN is set. @@ -56,7 +64,7 @@ for analysis_type in "SG" "TF"; do -w ${eager_output_dir}/work \ -with-tower \ -ansi-log false \ - -resume # ${run_name} + ${run_name} fi done done From f7d47f502d5334f6a2d5a0e21a0f927c469c9cb3 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 15:25:22 +0200 Subject: [PATCH 07/30] Keep bams from specific autorun pipeline per analysis type. --- scripts/prepare_eager_tsv.R | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R index 6d2dad1..39b8b77 100755 --- a/scripts/prepare_eager_tsv.R +++ b/scripts/prepare_eager_tsv.R @@ -46,6 +46,18 @@ save_ind_tsv <- function(data, rename, output_dir, ...) { readr::write_tsv(data, file=paste0(ind_dir,"/",ind_id,".tsv")) ## Output structure can be changed here. } +## Correspondance between '-a' analysis type and the name of Kay's pipeline. +## Only bams from the output autorun_name will be included in the output +autorun_name_from_analysis_type <- function(analysis_type) { + autorun_name <- case_when( + analysis_type == "TF" ~ "HUMAN_1240K", + analysis_type == "SG" ~ "HUMAN_SHOTGUN", + ## Future analyses can be added here to pull those bams for eager processsing. + TRUE ~ NA_character_ + ) + return(autorun_name) +} + ## MAIN ## ## Parse arguments ---------------------------- @@ -112,7 +124,7 @@ tibble_input_iids <- complete_pandora_table %>% filter(sequencing.Batch == seque ## Pull information from pandora, keeping only matching IIDs and requested Sequencing types. results <- inner_join(complete_pandora_table, tibble_input_iids, by=c("individual.Full_Individual_Id"="individual.Full_Individual_Id")) %>% - filter(grepl(paste0("\\.", analysis_type), sequencing.Full_Sequencing_Id)) %>% + filter(grepl(paste0("\\.", analysis_type), sequencing.Full_Sequencing_Id), analysis.Analysis_Id == autorun_name_from_analysis_type(analysis_type)) %>% select(individual.Full_Individual_Id,individual.Organism,library.Full_Library_Id,library.Protocol,analysis.Result_Directory,sequencing.Sequencing_Id,sequencing.Full_Sequencing_Id,sequencing.Single_Stranded) %>% distinct() %>% ## TODO comment: would be worrying if not already unique, maybe consider throwing a warn? group_by(individual.Full_Individual_Id) %>% From b342e3fda4e2a3852356185dba2704bdb78e6183 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 15:26:15 +0200 Subject: [PATCH 08/30] Created EVA_Autorun RProject --- .gitignore | 1 + EVA_autorun.Rproj | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 EVA_autorun.Rproj diff --git a/.gitignore b/.gitignore index b32e02b..62c31ab 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ eager_outputs/ .next* .RData .Rhistory +.Rproj.user diff --git a/EVA_autorun.Rproj b/EVA_autorun.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/EVA_autorun.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX From 76c1ddd5b9cfacb64e2b4a75f4d24b07cdbff646 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Mon, 25 Apr 2022 16:58:32 +0200 Subject: [PATCH 09/30] add nextflow temp files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 62c31ab..69ff430 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ eager_outputs/ .RData .Rhistory .Rproj.user +.nfs* From a5dcb28f84cf137f8980103bd74741a535d2a8cb Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Mon, 25 Apr 2022 17:04:05 +0200 Subject: [PATCH 10/30] Added testing directories --- scripts/run_Eager.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index ffb6448..bc44b19 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -7,6 +7,10 @@ root_input_dir='/mnt/archgen/Autorun_eager/eager_inputs' ## Directory should inc #### E.g. /mnt/archgen/Autorun_eager/eager_inputs/SG/GUB001/GUB001.tsv root_output_dir='/mnt/archgen/Autorun_eager/eager_outputs' +## Testing +# root_input_dir='/mnt/archgen/Autorun_eager/dev/testing/eager_inputs' ## Directory should include subdirectories for each analysis type (TF/SG) and sub-subdirectories for each individual. +# root_output_dir='/mnt/archgen/Autorun_eager/dev/testing/eager_outputs' + ## Set base profiles for EVA cluster. nextflow_profiles="eva,archgen,medium_data,autorun" From f931e45f65e304e41b09f6fa5012b252b15a3ad9 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 17:16:22 +0200 Subject: [PATCH 11/30] Use eager 2.4.4 --- scripts/run_Eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index bc44b19..67c6303 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash nxf_path="/mnt/archgen/tools/nextflow/21.04.3.5560" -eager_version='2.4.2' +eager_version='2.4.4' autorun_config='/mnt/archgen/Autorun_eager/conf/Autorun.config' ## Contains specific profiles with params for each analysis type. root_input_dir='/mnt/archgen/Autorun_eager/eager_inputs' ## Directory should include subdirectories for each analysis type (TF/SG) and sub-subdirectories for each individual. #### E.g. /mnt/archgen/Autorun_eager/eager_inputs/SG/GUB001/GUB001.tsv From 06d09071a45eb7dae74e89b18fcaadaa63b05b76 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 17:16:39 +0200 Subject: [PATCH 12/30] Update Changelog --- CHANGELOG.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f09f790..d998be3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [dev] - dd/mm/yyyy +## [1.0.0] - 25/04/2022 ### `Added` +- Directory structure now includes a subdirectory with the site ID. +- Autorun_eager runs now have informative run names. This will make it easier for users to check the progress of their data in the nextflow tower workspace. + ### `Fixed` +- Fixed a bug where the bams of additional Autorun pipelines would be pulled for processing than intended. +- The sample names for single stranded libraries now include the suffix `_ss` in the Sample Name field. Avoids file name collisions and makes merging of genotypes easier and allows end users to pick between dsDNA and ssDNA genotypes for individuals where both are available. + ### `Dependencies` +- [nf-core/eager](https://github.com/nf-core/eager) `2.4.2` -> `2.4.4` ### `Deprecated` ## [0.1.0] - 03/02/2022 @@ -30,5 +37,6 @@ Initial release of Autorun_eager. - [sidora.core](https://github.com/sidora-tools/sidora.core) - [pandora2eager](https://github.com/sidora-tools/pandora2eager) +- [nf-core/eager](https://github.com/nf-core/eager) `2.4.2` ### `Deprecated` From fc28d59865c2a7bd2c0f58a5db95e9436f7d1f2a Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 17:17:42 +0200 Subject: [PATCH 13/30] ssDNA libraries get _ss suffix in TSV --- scripts/prepare_eager_tsv.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R index 39b8b77..644e6ed 100755 --- a/scripts/prepare_eager_tsv.R +++ b/scripts/prepare_eager_tsv.R @@ -149,10 +149,16 @@ results <- inner_join(complete_pandora_table, tibble_input_iids, by=c("individua TRUE ~ inferred_udg ), R1=NA, - R2=NA - ) %>% + R2=NA, + ## Add `_ss` to sample name for ssDNA libraries. Avoids file name collisions and allows easier merging of genotypes for end users. + Sample_Name = case_when( + sequencing.Single_Stranded == 'yes' ~ paste0(individual.Full_Individual_Id, "_ss"), + TRUE ~ individual.Full_Individual_Id + ) + ) %>% + ungroup() %>% select( - "Sample_Name"=individual.Full_Individual_Id, + "Sample_Name", "Library_ID"=library.Full_Library_Id, "Lane", "Colour_Chemistry", From 9218a74e3aa90450ea996fd12b5efa14a8b3d72e Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 17:39:57 +0200 Subject: [PATCH 14/30] ssDNA and dsDNA in same TSV per individual --- scripts/prepare_eager_tsv.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R index 644e6ed..3d0172f 100755 --- a/scripts/prepare_eager_tsv.R +++ b/scripts/prepare_eager_tsv.R @@ -29,7 +29,7 @@ validate_analysis_type <- function(option, opt_str, value, parser) { save_ind_tsv <- function(data, rename, output_dir, ...) { ## Infer Individual Id(s) from input. - ind_id <- data %>% select(Sample_Name) %>% distinct() %>% pull() + ind_id <- data %>% select(individual.Full_Individual_Id) %>% distinct() %>% pull() site_id <- substr(ind_id,1,3) if (rename) { @@ -43,7 +43,7 @@ save_ind_tsv <- function(data, rename, output_dir, ...) { if (!dir.exists(ind_dir)) {write(paste0("[prepare_eager_tsv.R]: Creating output directory '",ind_dir,"'"), stdout())} dir.create(ind_dir, showWarnings = F, recursive = T) ## Create output directory and subdirs if they do not exist. - readr::write_tsv(data, file=paste0(ind_dir,"/",ind_id,".tsv")) ## Output structure can be changed here. + readr::write_tsv(data %>% select(-individual.Full_Individual_Id), file=paste0(ind_dir,"/",ind_id,".tsv")) ## Output structure can be changed here. } ## Correspondance between '-a' analysis type and the name of Kay's pipeline. @@ -156,8 +156,8 @@ results <- inner_join(complete_pandora_table, tibble_input_iids, by=c("individua TRUE ~ individual.Full_Individual_Id ) ) %>% - ungroup() %>% select( + individual.Full_Individual_Id, ## Still used for grouping, so ss and ds results of the same sample end up in the same TSV. "Sample_Name", "Library_ID"=library.Full_Library_Id, "Lane", From 502daec0cd99030c3c3ca1ef70b5703ec59b562a Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Mon, 25 Apr 2022 17:46:18 +0200 Subject: [PATCH 15/30] Rush mode option --- scripts/run_Eager.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index 67c6303..d10feaa 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash +## Flood execution. Useful for testing/fast processing of small batches. +if [[ $1 == "-r" || $1 == "--rush" ]]; then + rush="-bg" +else + rush='' +fi + nxf_path="/mnt/archgen/tools/nextflow/21.04.3.5560" eager_version='2.4.4' autorun_config='/mnt/archgen/Autorun_eager/conf/Autorun.config' ## Contains specific profiles with params for each analysis type. @@ -54,7 +61,7 @@ for analysis_type in "SG" "TF"; do -w ${eager_output_dir}/work \ -with-tower \ -ansi-log false \ - ${run_name}" + ${run_name} ${rush}" ## Actually run eager now. ## Monitor run in nf tower. Only works if TOWER_ACCESS_TOKEN is set. @@ -68,7 +75,7 @@ for analysis_type in "SG" "TF"; do -w ${eager_output_dir}/work \ -with-tower \ -ansi-log false \ - ${run_name} + ${run_name} ${rush} fi done done From 07b88eab9e4644790faec70cf5e5546e76f5683c Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Tue, 26 Apr 2022 11:56:09 +0200 Subject: [PATCH 16/30] Revert informative run names --- scripts/run_Eager.sh | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index d10feaa..d2bdf09 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -37,15 +37,17 @@ for analysis_type in "SG" "TF"; do site_id="${ind_id:0:3}" eager_output_dir="${root_output_dir}/${analysis_type}/${site_id}/${ind_id}" - ## Give informative run names for easier trackingin tower.nf + run_name="-resume" ## To be changed once/if a way to give informative run names becomes available + + ## TODO Give informative run names for easier trackingin tower.nf ## If the output directory exists, assume you need to resume a run, else just name it - if [[ -d "${eager_output_dir}" ]]; then - command_string="-resume" - else - command_string="-name" - fi - ## Run name is individual ID followed by analysis_type. -resume or -name added as appropriate - run_name="${command_string} $(basename ${eager_input} .tsv)_${analysis_type}" + # if [[ -d "${eager_output_dir}" ]]; then + # command_string="-resume" + # else + # command_string="-name" + # fi + # ## Run name is individual ID followed by analysis_type. -resume or -name added as appropriate + # run_name="${command_string} $(basename ${eager_input} .tsv)_${analysis_type}" ## If no multiqc_report exists (last step of eager), or TSV is newer than the report, start an eager run. #### Always running with resume will ensure runs are only ever resumed instead of restarting. From 2d955d5d6a5b8dd21e49369025ac99158988da4f Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Tue, 26 Apr 2022 12:17:41 +0200 Subject: [PATCH 17/30] unique cwd per run --- scripts/run_Eager.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index d2bdf09..7e97583 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -52,6 +52,9 @@ for analysis_type in "SG" "TF"; do ## If no multiqc_report exists (last step of eager), or TSV is newer than the report, start an eager run. #### Always running with resume will ensure runs are only ever resumed instead of restarting. if [[ ${eager_input} -nt ${eager_output_dir}/multiqc/multiqc_report.html ]]; then + + ## Change to input directory to run from, to keep one cwd per run. + cd $(dirname ${eager_input}) ## Debugging info. echo "Running eager on ${eager_input}:" echo "${nxf_path}/nextflow run nf-core/eager \ @@ -78,6 +81,8 @@ for analysis_type in "SG" "TF"; do -with-tower \ -ansi-log false \ ${run_name} ${rush} + + cd ${root_input_dir} ## Then back to root dir fi done done From 14d9fbba4727d00d9d5fde22a780469dff80440b Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Wed, 27 Apr 2022 11:47:33 +0200 Subject: [PATCH 18/30] Added dev dir used for testing --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 69ff430..d4d722d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ eager_outputs/ .Rhistory .Rproj.user .nfs* +dev/ From f4fd605ecc146607915147ae543bebceb0340097 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Wed, 27 Apr 2022 11:47:53 +0200 Subject: [PATCH 19/30] Revert informative run names --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d998be3..96cf63c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - Directory structure now includes a subdirectory with the site ID. -- Autorun_eager runs now have informative run names. This will make it easier for users to check the progress of their data in the nextflow tower workspace. ### `Fixed` From 75ef96beacd96ad8c4c6037009a91dfcd1254f21 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Wed, 27 Apr 2022 12:03:48 +0200 Subject: [PATCH 20/30] Linting --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96cf63c..2872c66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` - [nf-core/eager](https://github.com/nf-core/eager) `2.4.2` -> `2.4.4` + ### `Deprecated` ## [0.1.0] - 03/02/2022 From e709f1df7cd6761e8567cdae4a26e6d719e94557 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Thu, 28 Apr 2022 12:44:54 +0200 Subject: [PATCH 21/30] Back to 2.4.2. Seems to not have as many memory issues. --- scripts/run_Eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index 7e97583..6929058 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -8,7 +8,7 @@ else fi nxf_path="/mnt/archgen/tools/nextflow/21.04.3.5560" -eager_version='2.4.4' +eager_version='2.4.2' autorun_config='/mnt/archgen/Autorun_eager/conf/Autorun.config' ## Contains specific profiles with params for each analysis type. root_input_dir='/mnt/archgen/Autorun_eager/eager_inputs' ## Directory should include subdirectories for each analysis type (TF/SG) and sub-subdirectories for each individual. #### E.g. /mnt/archgen/Autorun_eager/eager_inputs/SG/GUB001/GUB001.tsv From 43331153caf6cc2b525ce4796ce972fd7a114d3a Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Thu, 28 Apr 2022 12:45:28 +0200 Subject: [PATCH 22/30] eager 2.4.2 --- CHANGELOG.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2872c66..fb76fbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,8 +16,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` -- [nf-core/eager](https://github.com/nf-core/eager) `2.4.2` -> `2.4.4` - ### `Deprecated` ## [0.1.0] - 03/02/2022 From 127c97bf7cc054c0f9c9998dd032e9ee1fa97864 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Thu, 28 Apr 2022 13:18:48 +0200 Subject: [PATCH 23/30] Update scripts/prepare_eager_tsv.R Co-authored-by: James A. Fellows Yates --- scripts/prepare_eager_tsv.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R index 3d0172f..82a7cd8 100755 --- a/scripts/prepare_eager_tsv.R +++ b/scripts/prepare_eager_tsv.R @@ -43,7 +43,7 @@ save_ind_tsv <- function(data, rename, output_dir, ...) { if (!dir.exists(ind_dir)) {write(paste0("[prepare_eager_tsv.R]: Creating output directory '",ind_dir,"'"), stdout())} dir.create(ind_dir, showWarnings = F, recursive = T) ## Create output directory and subdirs if they do not exist. - readr::write_tsv(data %>% select(-individual.Full_Individual_Id), file=paste0(ind_dir,"/",ind_id,".tsv")) ## Output structure can be changed here. + data %>% select(-individual.Full_Individual_Id) %>% readr::write_tsv(file=paste0(ind_dir,"/",ind_id,".tsv")) ## Output structure can be changed here. } ## Correspondance between '-a' analysis type and the name of Kay's pipeline. From 8c833379b62bd1ba1dbafa3dfe56fc66a260a125 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Mon, 2 May 2022 12:02:42 +0200 Subject: [PATCH 24/30] Use conda nextflow instead of central one --- scripts/run_Eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index 6929058..f32f6c2 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -7,7 +7,7 @@ else rush='' fi -nxf_path="/mnt/archgen/tools/nextflow/21.04.3.5560" +nxf_path="/home/srv_autoeager/conda/envs/autoeager/bin/nextflow" eager_version='2.4.2' autorun_config='/mnt/archgen/Autorun_eager/conf/Autorun.config' ## Contains specific profiles with params for each analysis type. root_input_dir='/mnt/archgen/Autorun_eager/eager_inputs' ## Directory should include subdirectories for each analysis type (TF/SG) and sub-subdirectories for each individual. From 089687d9832efb129becf715f126477994ebecee Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Mon, 2 May 2022 12:04:30 +0200 Subject: [PATCH 25/30] Bugfix --- scripts/run_Eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index f32f6c2..50cff21 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -7,7 +7,7 @@ else rush='' fi -nxf_path="/home/srv_autoeager/conda/envs/autoeager/bin/nextflow" +nxf_path="/home/srv_autoeager/conda/envs/autoeager/bin/" eager_version='2.4.2' autorun_config='/mnt/archgen/Autorun_eager/conf/Autorun.config' ## Contains specific profiles with params for each analysis type. root_input_dir='/mnt/archgen/Autorun_eager/eager_inputs' ## Directory should include subdirectories for each analysis type (TF/SG) and sub-subdirectories for each individual. From f95d14792853adb0d7078d4704a142e6fcc3ae28 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Thu, 11 Aug 2022 09:55:18 +0200 Subject: [PATCH 26/30] Bump eager to 2.4.5 --- scripts/run_Eager.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run_Eager.sh b/scripts/run_Eager.sh index 50cff21..e5aef24 100755 --- a/scripts/run_Eager.sh +++ b/scripts/run_Eager.sh @@ -8,7 +8,7 @@ else fi nxf_path="/home/srv_autoeager/conda/envs/autoeager/bin/" -eager_version='2.4.2' +eager_version='2.4.5' autorun_config='/mnt/archgen/Autorun_eager/conf/Autorun.config' ## Contains specific profiles with params for each analysis type. root_input_dir='/mnt/archgen/Autorun_eager/eager_inputs' ## Directory should include subdirectories for each analysis type (TF/SG) and sub-subdirectories for each individual. #### E.g. /mnt/archgen/Autorun_eager/eager_inputs/SG/GUB001/GUB001.tsv From a1067ee27caeb808010abaa2af39e14b49308072 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Wed, 24 Aug 2022 14:52:40 +0200 Subject: [PATCH 27/30] Correctly output TSV for mixed ssDNA and dsDNA samples. oopsie --- scripts/prepare_eager_tsv.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/prepare_eager_tsv.R b/scripts/prepare_eager_tsv.R index 82a7cd8..101e928 100755 --- a/scripts/prepare_eager_tsv.R +++ b/scripts/prepare_eager_tsv.R @@ -154,12 +154,17 @@ results <- inner_join(complete_pandora_table, tibble_input_iids, by=c("individua Sample_Name = case_when( sequencing.Single_Stranded == 'yes' ~ paste0(individual.Full_Individual_Id, "_ss"), TRUE ~ individual.Full_Individual_Id + ), + ## Also add the suffix to the Sample_ID part of the Library_ID. This ensures that in the MultiQC report, the ssDNA libraries will be sorted after the ssDNA sample. + Library_ID = case_when( + sequencing.Single_Stranded == 'yes' ~ paste0(Sample_Name, ".", stringr::str_split_fixed(library.Full_Library_Id, "\\.", 2)[,2]), + TRUE ~ library.Full_Library_Id ) ) %>% select( individual.Full_Individual_Id, ## Still used for grouping, so ss and ds results of the same sample end up in the same TSV. "Sample_Name", - "Library_ID"=library.Full_Library_Id, + "Library_ID", "Lane", "Colour_Chemistry", "SeqType", @@ -175,4 +180,4 @@ results <- inner_join(complete_pandora_table, tibble_input_iids, by=c("individua if ( opts$debug ) { write_tsv(results, file=paste0(sequencing_batch_id, ".", analysis_type, ".results.txt")) } ## Group by individual IDs and save each chunk as TSV -results %>% group_by(Sample_Name) %>% group_walk(~save_ind_tsv(., rename=F, output_dir=output_dir), .keep=T) +results %>% group_by(individual.Full_Individual_Id) %>% group_walk(~save_ind_tsv(., rename=F, output_dir=output_dir), .keep=T) From e6eb011e89587f6075044f64f20cf3973c9adff7 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Wed, 24 Aug 2022 14:53:17 +0200 Subject: [PATCH 28/30] Submit jobs to all.q --- conf/Autorun.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/Autorun.config b/conf/Autorun.config index e141372..811adbd 100644 --- a/conf/Autorun.config +++ b/conf/Autorun.config @@ -9,6 +9,10 @@ profiles { config_profile_contact = 'Thiseas C. Lamnidis (@TCLamnidis)' config_profile_description = 'Autorun_eager profile for automated processing in EVA' } + + process { + queue = "all.q" + } } // Profile with parameters for runs using the Human_SG bams as input. From 8e2e79e3850bdcdc1251a517eb2f5de2d5835be3 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Mon, 19 Sep 2022 13:59:24 +0200 Subject: [PATCH 29/30] Update CHANGELOG.md --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb76fbb..700d5e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,19 +3,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [1.0.0] - 25/04/2022 +## [1.0.0] - 19/09/2022 ### `Added` - Directory structure now includes a subdirectory with the site ID. +- Jobs are now submitted to `all.q` ### `Fixed` - Fixed a bug where the bams of additional Autorun pipelines would be pulled for processing than intended. - The sample names for single stranded libraries now include the suffix `_ss` in the Sample Name field. Avoids file name collisions and makes merging of genotypes easier and allows end users to pick between dsDNA and ssDNA genotypes for individuals where both are available. +- Library names of single stranded libraries also include the suffix `_ss` in the Library Name field. This ensures that rows in the MultiQC report are sorted correctly. ### `Dependencies` +- nf-core/eager=2.4.5 + ### `Deprecated` ## [0.1.0] - 03/02/2022 From e9fad0f0eaf42183c43625ae0fc3c609549d67e3 Mon Sep 17 00:00:00 2001 From: "Thiseas C. Lamnidis" Date: Mon, 19 Sep 2022 14:01:44 +0200 Subject: [PATCH 30/30] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 04a8c90..447ba4d 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ Automated nf-core/eager processing of Autorun output bams. - Run `prepare_eager_tsv.R` for human SG or TF data for a given sequencing batch: ```bash - prepare_eager_tsv.R -s 210429_K00233_0191_AHKJHFBBXY_Jena0014 -a SG -o eager_inputs/ -d .eva_credentials - prepare_eager_tsv.R -s 210802_K00233_0212_BHLH3FBBXY_SRdi_JR_BN -a TF -o eager_inputs/ -d .eva_credentials + prepare_eager_tsv.R -s -a SG -o eager_inputs/ -d .eva_credentials + prepare_eager_tsv.R -s -a TF -o eager_inputs/ -d .eva_credentials ``` - Run eager with the following script, which then runs on the generated TSV files: @@ -93,7 +93,7 @@ eager_inputs A wrapper shell script that goes through all TSVs in the `eager_inputs` directory, checks if a completed run exists for a given TSV, and submits/resumes an eager run for that individual if necessary. -Currently uses eager version `2.4.2` and profiles `eva,archgen,medium_data,autorun` across all runs, with the `SG` or `TF` profiles used for their respective +Currently uses eager version `2.4.5` and profiles `eva,archgen,medium_data,autorun` across all runs, with the `SG` or `TF` profiles used for their respective data types. The outputs are saved with the same directory structure as the inputs, but in a separate parent directory.