diff --git a/CHANGELOG.md b/CHANGELOG.md index b6d1aee..6dc7a4a 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add GRIDSS2 variant calling - Add GRIDSS2 assembly - Add GRIDSS2 preprocessing - Add supported Nextflow version to `README.md` diff --git a/config/F16.config b/config/F16.config index 7f5f57d..5d5b91d 100644 --- a/config/F16.config +++ b/config/F16.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 15.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 16.GB diff --git a/config/F2.config b/config/F2.config index 7727659..68120b8 100644 --- a/config/F2.config +++ b/config/F2.config @@ -36,7 +36,7 @@ process { } } } - + withName: call_sSV_Manta { cpus = 1 memory = 3.GB @@ -47,5 +47,4 @@ process { } } } - } - + } diff --git a/config/F32.config b/config/F32.config index 27604ab..e44941d 100644 --- a/config/F32.config +++ b/config/F32.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 30.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 30.GB diff --git a/config/F72.config b/config/F72.config index 6f7835c..0c01903 100644 --- a/config/F72.config +++ b/config/F72.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 30.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 30.GB diff --git a/config/M64.config b/config/M64.config index 29e2494..01c71d6 100644 --- a/config/M64.config +++ b/config/M64.config @@ -26,6 +26,17 @@ process { } } + withName: call_sSV_GRIDSS { + cpus = 8 + memory = 120.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: call_sSV_Delly { cpus = 1 memory = 120.GB diff --git a/config/default.config b/config/default.config index c1496d9..14512e7 100644 --- a/config/default.config +++ b/config/default.config @@ -16,6 +16,8 @@ params { min_cpus = 1 min_memory = 1.MB + other_jvm_heap = 4.GB + cache_intermediate_pipeline_steps = false ucla_cds = true diff --git a/config/template.config b/config/template.config index 6971113..9bf6224 100755 --- a/config/template.config +++ b/config/template.config @@ -16,6 +16,8 @@ params { // GRCh38 blacklist - /hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed gridss_blacklist = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/ENCFF356LFX.bed" gridss_reference_fasta = "/hot/resource/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta" + // When GRIDSS errors OutOfMemory, update `other_jvm_heap`. Default is `4.GB` + //other_jvm_heap = 4.GB exclusion_file = "/hot/resource/tool-specific-input/Delly/hg38/human.hg38.excl.tsv" diff --git a/main.nf b/main.nf index b118c84..a47f59e 100644 --- a/main.nf +++ b/main.nf @@ -55,7 +55,7 @@ include { call_sSV_Delly; filter_sSV_Delly } from './module/delly' addParams( include { call_sSV_Manta } from './module/manta' addParams( workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}" ) -include { preprocess_BAM_GRIDSS; run_assembly_GRIDSS } from './module/gridss' addParams( +include { preprocess_BAM_GRIDSS; run_assembly_GRIDSS; call_sSV_GRIDSS } from './module/gridss' addParams( workflow_output_dir: "${params.output_dir_base}/GRIDSS-${params.gridss_version}" ) include { generate_sha512 as generate_sha512_BCFtools } from './module/sha512' addParams( @@ -217,6 +217,21 @@ workflow { gridss_reference_files, params.gridss_blacklist ) - } + gridss_assembly_dir = run_assembly_GRIDSS.out.gridss_assembly + .flatten() + .map { parentdir -> parentdir.getParent() } + .unique() + .collect() + + call_sSV_GRIDSS( + input_paired_bams_ch, + gridss_preprocess_dir, + gridss_assembly_dir, + run_assembly_GRIDSS.out.gridss_assembly_bam, + params.gridss_reference_fasta, + gridss_reference_files, + params.gridss_blacklist + ) + } } diff --git a/module/gridss.nf b/module/gridss.nf index a79b935..ea6a5d6 100644 --- a/module/gridss.nf +++ b/module/gridss.nf @@ -14,6 +14,7 @@ process preprocess_BAM_GRIDSS { container params.docker_image_gridss publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, pattern: "${bam_name}.gridss.working/*", mode: "copy", saveAs: { @@ -61,6 +62,7 @@ process run_assembly_GRIDSS { container params.docker_image_gridss publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, pattern: "${tumor_id}.assembly.bam", mode: "copy", saveAs: { @@ -68,6 +70,7 @@ process run_assembly_GRIDSS { } publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, pattern: "${tumor_id}.assembly.bam.gridss.working/*", mode: "copy", saveAs: { @@ -88,11 +91,11 @@ process run_assembly_GRIDSS { output: path "${tumor_id}.assembly.bam", emit: gridss_assembly_bam - path "${tumor_id}.assembly.bam.gridss.working/*" + path "${tumor_id}.assembly.bam.gridss.working/*", emit: gridss_assembly path ".command.*" script: - otherjvmheap = 4.GB + otherjvmheap = params.other_jvm_heap gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g" gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" @@ -118,3 +121,70 @@ process run_assembly_GRIDSS { ${tumor_bam} """ } + +process call_sSV_GRIDSS { + container params.docker_image_gridss + + publishDir "${params.workflow_output_dir}/output/", + pattern: "${tumor_id}.{vcf,vcf.idx}", + mode: "copy", + saveAs: { + "${output_filename}.${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}", + enabled: params.save_intermediate_files, + pattern: "${tumor_id}.vcf.gridss.working/*", + mode: "copy", + saveAs: { + "${output_filename}.vcf.gridss.working/${output_filename}.${sanitize_string(file(it).getName().replace("${tumor_id}.", ""))}" + } + + publishDir "${params.log_output_dir}/process-log", + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.replace(':', '/')}/log${file(it).getName()}" } + + input: + tuple(val(tumor_id), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai)) + path(gridss_preprocess_dir) + path(gridss_assembly_dir) + path(gridss_assembly_bam) + path(gridss_reference_fasta) + path(gridss_reference_files) + path(gridss_blacklist) + + output: + path "${tumor_id}.vcf", emit: gridss_vcf + path "${tumor_id}.vcf.idx", emit: gridss_vcf_idx + path "${tumor_id}.vcf.gridss.working/*", emit: gridss_vcf_dir + path ".command.*" + + script: + otherjvmheap = params.other_jvm_heap + gridss_otherjvmheap = "${otherjvmheap.toGiga()}g" + gridss_jvmheap = "${(task.memory - otherjvmheap).toGiga()}g" + gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar" + output_filename = generate_standard_filename( + "GRIDSS2-${params.gridss_version}", + params.dataset_id, + tumor_id, + [:] + ) + + """ + set -euo pipefail + gridss \ + -r ${gridss_reference_fasta} \ + -j ${gridss_jar} \ + -s call \ + -t ${task.cpus} \ + --jvmheap ${gridss_jvmheap} \ + --otherjvmheap ${gridss_otherjvmheap} \ + -b ${gridss_blacklist} \ + -a ${gridss_assembly_bam} \ + --output ${tumor_id}.vcf \ + ${normal_bam} \ + ${tumor_bam} + """ + }