From 8ace68acf2c53e0d7065cb12259106d260a4fa6b Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Sun, 3 Nov 2024 09:06:59 -0800 Subject: [PATCH 01/15] emit gridss assembly --- module/gridss.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/gridss.nf b/module/gridss.nf index a79b935..53933de 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -88,7 +88,7 @@ process run_assembly_GRIDSS { output: path "${tumor_id}.assembly.bam", emit: gridss_assembly_bam - path "${tumor_id}.assembly.bam.gridss.working/*" + path "${tumor_id}.assembly.bam.gridss.working/*", emit: gridss_assembly path ".command.*" script: From b8f92eeecac0b644177303ece52d8e50fe9c16e2 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Sun, 3 Nov 2024 09:07:30 -0800 Subject: [PATCH 02/15] get parent dir for gridss assembly --- main.nf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/main.nf b/main.nf index b118c84..4f10284 100644 --- a/main.nf +++ b/main.nf @@ -217,6 +217,12 @@ workflow { gridss_reference_files, params.gridss_blacklist ) + + gridss_assembly_dir = preprocess_BAM_GRIDSS.out.gridss_assembly + .flatten() + .map { parentdir -> parentdir.getParent() } + .unique() + .collect() } } From 49a03c35b9be4de9eba4a7b62bbf0b81a0fda789 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Sun, 3 Nov 2024 09:51:07 -0800 Subject: [PATCH 03/15] add GRIDSS SV calling --- module/gridss.nf | 66 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/module/gridss.nf b/module/gridss.nf index 53933de..0951c9c 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -118,3 +118,69 @@ process run_assembly_GRIDSS { ${tumor_bam} """ } + +process call_sSV_GRIDSS { + container params.docker_image_gridss + + publishDir "${params.workflow_output_dir}/output/", + pattern: "${tumor_id}.{vcf, vcf.idx}", + mode: "copy", + saveAs: { + "${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + pattern: "${tumor_id}.vcf.gridss.working/*", + mode: "copy", + saveAs: { + "${output_filename}.vcf.gridss.working/${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.log_output_dir}/process-log", + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + + input: + tuple(val(tumor_id), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)) + path(gridss_preprocess_dir) + path(gridss_assembly_dir) + path(gridss_assembly_bam) + path(gridss_reference_fasta) + path(gridss_reference_files) + path(gridss_blacklist) + + output: + path "${tumor_id}.vcf", emit: gridss_vcf + path "${tumor_id}.vcf.idx", emit: gridss_vcf_idx + path "${tumor_id}.vcf.gridss.working/*", emit: gridss_vcf_dir + path ".command.*" + + script: + otherjvmheap = 4.GB + gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" + gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g" + gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" + output_filename = generate_standard_filename( + "GRIDSS2-${params.gridss_version}", + params.dataset_id, + tumor_id, + [:] + ) + + """ + set -euo pipefail + gridss \ + -r ${gridss_reference_fasta} \ + -j ${gridss_jar} \ + -s call \ + -t ${task.cpus} \ + --jvmheap ${gridss_jvmheap} \ + --otherjvmheap ${gridss_otherjvmheap} \ + -b ${gridss_blacklist} \ + -a ${gridss_assembly_bam} \ + --output ${tumor_id}.vcf \ + ${normal_bam} \ + ${tumor_bam} + """ + } From 96557c497016b7a6e70451df426e4d987881880e Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Sun, 3 Nov 2024 09:52:36 -0800 Subject: [PATCH 04/15] add resource allocation for gridss sv calling in F16 --- config/F16.config | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/config/F16.config b/config/F16.config index 7f5f57d..5d5b91d 100644 --- a/config/F16.config +++ b/config/F16.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 15.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 16.GB From dec08ae811fe9ed192943203b9f210efd1169613 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Sun, 3 Nov 2024 09:56:39 -0800 Subject: [PATCH 05/15] add gridss SV calling to main --- main.nf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/main.nf b/main.nf index 4f10284..0269d57 100644 --- a/main.nf +++ b/main.nf @@ -225,4 +225,13 @@ workflow { .collect() } + call_sSV_GRIDSS( + input_paired_bams_ch, + gridss_assembly_dir, + run_assembly_GRIDSS.out.gridss_assembly_bam, + params.gridss_reference_fasta, + gridss_reference_files, + params.gridss_blacklist + ) + } From a2423c28bc073aec55d18dd671ce60b384f56584 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Tue, 5 Nov 2024 10:36:12 -0800 Subject: [PATCH 06/15] fix naming format --- module/gridss.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/gridss.nf b/module/gridss.nf index 0951c9c..19d9496 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -123,17 +123,17 @@ process call_sSV_GRIDSS { container params.docker_image_gridss publishDir "${params.workflow_output_dir}/output/", - pattern: "${tumor_id}.{vcf, vcf.idx}", + pattern: "${tumor_id}.{vcf,vcf.idx}", mode: "copy", saveAs: { - "${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + "${output_filename}.${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" } publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", pattern: "${tumor_id}.vcf.gridss.working/*", mode: "copy", saveAs: { - "${output_filename}.vcf.gridss.working/${output_filename}_${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + "${output_filename}.vcf.gridss.working/${output_filename}.${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" } publishDir "${params.log_output_dir}/process-log", From 33ca0772571727b0b3954ecbab24456729ecdfc5 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Tue, 5 Nov 2024 10:40:04 -0800 Subject: [PATCH 07/15] fix call_sSV_GRIDSS function call --- main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index 0269d57..a47f59e 100644 --- a/main.nf +++ b/main.nf @@ -55,7 +55,7 @@ include { call_sSV_Delly; filter_sSV_Delly } from './module/delly' addParams( include { call_sSV_Manta } from './module/manta' addParams( workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}" ) -include { preprocess_BAM_GRIDSS; run_assembly_GRIDSS } from './module/gridss' addParams( +include { preprocess_BAM_GRIDSS; run_assembly_GRIDSS; call_sSV_GRIDSS } from './module/gridss' addParams( workflow_output_dir: "${params.output_dir_base}/GRIDSS-${params.gridss_version}" ) include { generate_sha512 as generate_sha512_BCFtools } from './module/sha512' addParams( @@ -218,20 +218,20 @@ workflow { params.gridss_blacklist ) - gridss_assembly_dir = preprocess_BAM_GRIDSS.out.gridss_assembly + gridss_assembly_dir = run_assembly_GRIDSS.out.gridss_assembly .flatten() .map { parentdir -> parentdir.getParent() } .unique() .collect() - } call_sSV_GRIDSS( input_paired_bams_ch, + gridss_preprocess_dir, gridss_assembly_dir, run_assembly_GRIDSS.out.gridss_assembly_bam, params.gridss_reference_fasta, gridss_reference_files, params.gridss_blacklist ) - + } } From cb1a3131ccb1920dcd238061a1a13b6f2e3f6eee Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Tue, 5 Nov 2024 11:13:22 -0800 Subject: [PATCH 08/15] update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6d1aee..6dc7a4a 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add GRIDSS2 variant calling - Add GRIDSS2 assembly - Add GRIDSS2 preprocessing - Add supported Nextflow version to `README.md` From 2d357d2e25305f95062722df4fb5447a4ea074c3 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Tue, 5 Nov 2024 11:14:05 -0800 Subject: [PATCH 09/15] fix spaces --- config/F2.config | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/config/F2.config b/config/F2.config index 7727659..68120b8 100644 --- a/config/F2.config +++ b/config/F2.config @@ -36,7 +36,7 @@ process { } } } - + withName: call_sSV_Manta { cpus = 1 memory = 3.GB @@ -47,5 +47,4 @@ process { } } } - } - + } From dc7b3bc773cce03c58083688aaeec9e69ecbaba6 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Wed, 13 Nov 2024 15:17:22 -0800 Subject: [PATCH 10/15] parameterize other jvm heap --- config/template.config | 1 + module/gridss.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config/template.config b/config/template.config index 6971113..8b4c13c 100755 --- a/config/template.config +++ b/config/template.config @@ -16,6 +16,7 @@ params { // GRCh38 blacklist - /hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed gridss_blacklist = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed" gridss_reference_fasta = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta" + other_jvm_heap = "4.GB" exclusion_file = "/hot/resource/tool-specific-input/Delly/hg38/human.hg38.excl.tsv" diff --git a/module/gridss.nf b/module/gridss.nf index 19d9496..c34284f 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -157,7 +157,7 @@ process call_sSV_GRIDSS { path ".command.*" script: - otherjvmheap = 4.GB + otherjvmheap = params.other_jvm_heap gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g" gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" From a13d8f9fb4ad917c11f1141f250b4a8a0dbc5890 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Wed, 13 Nov 2024 15:26:12 -0800 Subject: [PATCH 11/15] use other jvm heap parameter --- module/gridss.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/gridss.nf b/module/gridss.nf index c34284f..6f0750e 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -92,7 +92,7 @@ process run_assembly_GRIDSS { path ".command.*" script: - otherjvmheap = 4.GB + otherjvmheap = params.other_jvm_heap gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g" gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" From 9e3d7c3bba0cc78c6ecba1a4b17053808eef2b81 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Wed, 13 Nov 2024 15:35:17 -0800 Subject: [PATCH 12/15] add node configs --- config/F32.config | 11 +++++++++++ config/F72.config | 11 +++++++++++ config/M64.config | 11 +++++++++++ 3 files changed, 33 insertions(+) diff --git a/config/F32.config b/config/F32.config index 27604ab..e44941d 100644 --- a/config/F32.config +++ b/config/F32.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 30.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 30.GB diff --git a/config/F72.config b/config/F72.config index 6f7835c..0c01903 100644 --- a/config/F72.config +++ b/config/F72.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 30.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 30.GB diff --git a/config/M64.config b/config/M64.config index 29e2494..01c71d6 100644 --- a/config/M64.config +++ b/config/M64.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 120.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 120.GB From acadc14bdadd03ab3c651ec60229061a244b1323 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Wed, 13 Nov 2024 15:52:44 -0800 Subject: [PATCH 13/15] enable save_intermediate_files option --- module/gridss.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/module/gridss.nf b/module/gridss.nf index 6f0750e..ea6a5d6 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -14,6 +14,7 @@ process preprocess_BAM_GRIDSS { container params.docker_image_gridss publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, pattern: "${bam_name}.gridss.working/*", mode: "copy", saveAs: { @@ -61,6 +62,7 @@ process run_assembly_GRIDSS { container params.docker_image_gridss publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, pattern: "${tumor_id}.assembly.bam", mode: "copy", saveAs: { @@ -68,6 +70,7 @@ process run_assembly_GRIDSS { } publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, pattern: "${tumor_id}.assembly.bam.gridss.working/*", mode: "copy", saveAs: { @@ -130,6 +133,7 @@ process call_sSV_GRIDSS { } publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, pattern: "${tumor_id}.vcf.gridss.working/*", mode: "copy", saveAs: { From f1e0a459ee0080221ff322eb6c85a6f63c9a729e Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Wed, 13 Nov 2024 17:24:05 -0800 Subject: [PATCH 14/15] remove double quotes for hother_jvm_heap --- config/template.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/template.config b/config/template.config index 8b4c13c..1174475 100755 --- a/config/template.config +++ b/config/template.config @@ -16,7 +16,7 @@ params { // GRCh38 blacklist - /hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed gridss_blacklist = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed" gridss_reference_fasta = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta" - other_jvm_heap = "4.GB" + other_jvm_heap = 4.GB exclusion_file = "/hot/resource/tool-specific-input/Delly/hg38/human.hg38.excl.tsv" From c2ba27d2f26817678c0af026ef23b1dcdd1f2059 Mon Sep 17 00:00:00 2001 From: Faizal-Eeman Date: Wed, 20 Nov 2024 11:27:41 -0800 Subject: [PATCH 15/15] set other_jvm_heap in default config and document in template.config --- config/default.config | 2 ++ config/template.config | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/config/default.config b/config/default.config index c1496d9..14512e7 100644 --- a/config/default.config +++ b/config/default.config @@ -16,6 +16,8 @@ params { min_cpus = 1 min_memory = 1.MB + other_jvm_heap = 4.GB + cache_intermediate_pipeline_steps = false ucla_cds = true diff --git a/config/template.config b/config/template.config index 1174475..9bf6224 100755 --- a/config/template.config +++ b/config/template.config @@ -16,7 +16,8 @@ params { // GRCh38 blacklist - /hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed gridss_blacklist = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed" gridss_reference_fasta = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta" - other_jvm_heap = 4.GB + // When GRIDSS errors OutOfMemory, update `other_jvm_heap`. Default is `4.GB` + //other_jvm_heap = 4.GB exclusion_file = "/hot/resource/tool-specific-input/Delly/hg38/human.hg38.excl.tsv"