diff --git a/.gitignore b/.gitignore index 8cf857e..7d797ff 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ results/ testing* testing/ work/ +.nf-test.log diff --git a/CHANGELOG.md b/CHANGELOG.md index abde31e..446d7b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Initial release of nf-core/createpanelrefs, created with the [nf-core](https://n - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - `CNVKIT` can be used to create a PON - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-validation - [#5](https://github.com/nf-core/createpanelrefs/pull/5) - Usage of nf-test +- [#8](https://github.com/nf-core/createpanelrefs/pull/8) - `Mutect2` can be used to create a PON ### `Fixed` diff --git a/conf/igenomes.config b/conf/igenomes.config index 614ef61..73e84d9 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -280,7 +280,6 @@ params { 'GATK.GRCh38' { bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAIndex/" bwamem2 = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/BWAmem2Index/" - cf_chrom_len = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Length/Homo_sapiens_assembly38.len" dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config new file mode 100644 index 0000000..9bbbb9d --- /dev/null +++ b/conf/modules/mutect2.config @@ -0,0 +1,19 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: '.*BAM_CREATE_SOM_PON_GATK:GATK4_MUTECT2' { + ext.args = "--max-mnp-distance 0" + } + +} diff --git a/conf/test.config b/conf/test.config index 00bbe3b..3912e07 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,17 +20,20 @@ params { max_time = '6.h' // Input data - input = "${projectDir}/tests/csv/1.0.0/cram.csv" + input = "${projectDir}/tests/csv/1.0.0/bam.csv" // Main options tools = 'cnvkit' //Germlinecnvcaller options - gcnv_scatter_content = 2 - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_scatter_content = 2 + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome - genome = null - igenomes_ignore = true - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + genome = null + igenomes_ignore = true + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta" + dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.dict" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai" + target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed" } diff --git a/modules.json b/modules.json index 01b1871..49a3fcc 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/createsomaticpanelofnormals": { + "branch": "master", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "installed_by": ["bam_create_som_pon_gatk"] + }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", @@ -40,6 +45,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/genomicsdbimport": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["bam_create_som_pon_gatk"] + }, "gatk4/germlinecnvcaller": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", @@ -50,6 +60,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/mutect2": { + "branch": "master", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "installed_by": ["bam_create_som_pon_gatk"] + }, "gatk4/preprocessintervals": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", @@ -79,6 +94,11 @@ }, "subworkflows": { "nf-core": { + "bam_create_som_pon_gatk": { + "branch": "master", + "git_sha": "b0783b07beb65cac505fa6202e8f670437637b45", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml new file mode 100644 index 0000000..ae543c6 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createsomaticpanelofnormals +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf new file mode 100644 index 0000000..27a50dc --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/main.nf @@ -0,0 +1,48 @@ +process GATK4_CREATESOMATICPANELOFNORMALS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(genomicsdb) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSomaticPanelOfNormals \\ + --variant gendb://$genomicsdb \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml new file mode 100644 index 0000000..9c3ee19 --- /dev/null +++ b/modules/nf-core/gatk4/createsomaticpanelofnormals/meta.yml @@ -0,0 +1,69 @@ +name: gatk4_createsomaticpanelofnormals +description: Create a panel of normals contraining germline and artifactual sites for use with mutect2. +keywords: + - createsomaticpanelofnormals + - gatk4 + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - genoomicsdb: + type: directory + description: genomicsDB workspace that contains the samples to create the somatic panel of normals with. + pattern: "*_genomicsDBworkspace" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test'] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - vcf: + type: file + description: panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Tabix index of vcf file + pattern: "*vcf.gz.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml new file mode 100644 index 0000000..a3a1363 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_genomicsdbimport +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf new file mode 100644 index 0000000..6f1d4c5 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -0,0 +1,104 @@ +process GATK4_GENOMICSDBIMPORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) + val run_intlist + val run_updatewspace + val input_map + + output: + tuple val(meta), path("$prefix") , optional:true, emit: genomicsdb + tuple val(meta), path("$updated_db") , optional:true, emit: updatedb + tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + // settings for running default create gendb mode + input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect(){"--variant $it"}.join(' ') + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK GenomicsDBImport] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + GenomicsDBImport \\ + $input_command \\ + $genomicsdb_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" + updated_db = "" + + // settings changed for running get intervals list mode if run_intlist is true + if (run_intlist) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" + } + + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb + if (run_updatewspace) { + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" + } + + def stub_genomicsdb = genomicsdb_command == "--genomicsdb-workspace-path ${prefix}" ? "touch ${prefix}" : "" + def stub_interval = interval_command == "--output-interval-list-to-file ${prefix}.interval_list" ? "touch ${prefix}.interval_list" : "" + def stub_update = updated_db != "" ? "touch ${wspace}" : "" + + """ + ${stub_genomicsdb} + ${stub_interval} + ${stub_update} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml new file mode 100644 index 0000000..11e565b --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -0,0 +1,76 @@ +name: gatk4_genomicsdbimport +description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation. +keywords: + - gatk4 + - genomicsdb + - genomicsdbimport + - jointgenotyping + - panelofnormalscreation +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. + pattern: "*.vcf.gz" + - tbi: + type: list + description: list of tbi files that match with the input vcf files + pattern: "*.vcf.gz_tbi" + - wspace: + type: file + description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. + pattern: "/path/to/existing/gendb" + - interval_file: + type: file + description: file containing the intervals to be used when creating the genomicsdb + pattern: "*.interval_list" + - interval_value: + type: string + description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument + pattern: "example: chr1:1000-10000" + - run_intlist: + type: boolean + description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. + pattern: "true/false" + - run_updatewspace: + type: boolean + description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. + pattern: "true/false" + - input_map: + type: boolean + description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. + pattern: "*.sample_map" +output: + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db + pattern: "*/$prefix" + - updatedb: + type: directory + description: Directory containing the files that compose the updated genomicsdb workspace, this is only output for update mode, and should be the same path as the input wspace. + pattern: "same/path/as/wspace" + - intervallist: + type: file + description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode. + pattern: "*.interval_list" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test new file mode 100644 index 0000000..9c207b3 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_process { + + name "Test Process GATK4_GENOMICSDBIMPORT" + script "../main.nf" + process "GATK4_GENOMICSDBIMPORT" + + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "gatk4" + tag "gatk4/genomicsdbimport" + + test("test_gatk4_genomicsdbimport_create_genomicsdb") { + + when { + process { + """ + // [meta, vcf, tbi, interval, interval_value, workspace ] + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] + // run_intlist + input[1] = false + // run_updatewspace + input[2] = false + // input_map + input[3] = false + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_get_intervalslist") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz', checkIfExists: true) ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ [id:"test"], [], [], [], []]).combine(UNTAR.out.untar.map{ it[1] }) + // run_intlist + input[1] = true + // run_updatewspace + input[2] = false + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + { assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_update_genomicsdb") { + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz', checkIfExists: true) ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ [id:"test"], file( params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz' , checkIfExists: true), file( params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi' , checkIfExists: true), [], []]).combine(UNTAR.out.untar.map{ it[1] }) + // run_intlist + input[1] = false + // run_updatewspace + input[2] = true + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(file(process.out.genomicsdb.get(0).get(1)).list().sort()).match() } + { assert snapshot(file(process.out.updatedb.get(0).get(1)).list().sort()).match() } + //{ assert snapshot(process.out.intervallist.get(0).get(1)).match() } + ) + } + + } + + test("test_gatk4_genomicsdbimport_stub") { + + options "-stub" + + when { + process { + """ + // [meta, vcf, tbi, interval, interval_value, workspace ] + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) , + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) , + [] , + [] ] + // run_intlist + input[1] = false + // run_updatewspace + input[2] = false + // input_map + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap new file mode 100644 index 0000000..a633bbd --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/main.nf.test.snap @@ -0,0 +1,40 @@ +{ + "test_gatk4_genomicsdbimport_get_intervalslist": { + "content": [ + "test.interval_list:md5,4c85812ac15fc1cd29711a851d23c0bf" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-28T17:55:03.846241" + }, + "test_gatk4_genomicsdbimport_create_genomicsdb": { + "content": [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T11:22:10.11423157" + }, + "test_gatk4_genomicsdbimport_update_genomicsdb": { + "content": [ + "__tiledb_workspace.tdb", + "callset.json", + "chr22$1$40001", + "vcfheader.vcf", + "vidmap.json" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T12:46:42.403794676" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config b/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config new file mode 100644 index 0000000..e177a14 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/nextflow.config @@ -0,0 +1,2 @@ +process { +} diff --git a/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml b/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml new file mode 100644 index 0000000..8a00857 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/tests/tags.yml @@ -0,0 +1,3 @@ +gatk4/genomicsdbimport: + - "modules/nf-core/gatk4/genomicsdbimport/**" + - "modules/nf-core/untar/**" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml new file mode 100644 index 0000000..86f4bfa --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_mutect2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf new file mode 100644 index 0000000..79d8d28 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -0,0 +1,75 @@ +process GATK4_MUTECT2 { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + path(germline_resource) + path(germline_resource_tbi) + path(panel_of_normals) + path(panel_of_normals_tbi) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + tuple val(meta), path("*.stats") , emit: stats + tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def inputs = input.collect{ "--input $it"}.join(" ") + def interval_command = intervals ? "--intervals $intervals" : "" + def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : "" + def gr_command = germline_resource ? "--germline-resource $germline_resource" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + Mutect2 \\ + $inputs \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $pon_command \\ + $gr_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + touch ${prefix}.vcf.gz.stats + touch ${prefix}.f1r2.tar.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml new file mode 100644 index 0000000..21c928e --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -0,0 +1,107 @@ +name: gatk4_mutect2 +description: Call somatic SNVs and indels via local assembly of haplotypes. +keywords: + - gatk4 + - haplotype + - indels + - mutect2 + - snvs + - somatic +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: list + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: + type: list + description: list of BAM file indexes, also able to take CRAM indexes as an input + pattern: "*.{bam.bai/cram.crai}" + - intervals: + type: file + description: Specify region the tools is run on. + pattern: ".{bed,interval_list}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" +output: + - vcf: + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + - stats: + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - f1r2: + type: file + description: file containing information to be passed to LearnReadOrientationModel (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GCJMackenzie" + - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/nextflow.config b/nextflow.config index 7f0a4d1..a837e6e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,17 +10,22 @@ params { // Input options - input = null + input = null + // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - fasta = null - fai = null - dict = null + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + fasta = null + fai = null + dict = null + target_bed = null // Building Panel of Normals and models - tools = null // No default, must be specified + tools = null // No default, must be specified + + // Mutect2 options + mutect2_pon_name = null // Germlinecnvcaller options gcnv_analysis_type = 'wgs' @@ -45,36 +50,36 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false // Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Max resource options // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' - validationShowHiddenParams = false - validate_params = true + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true } @@ -97,95 +102,95 @@ try { // } profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - channels = ['conda-forge', 'bioconda', 'defaults'] - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } @@ -194,10 +199,10 @@ profiles { // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' // Nextflow plugins plugins { @@ -215,10 +220,10 @@ if (!params.igenomes_ignore) { // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" - JULIA_DEPOT_PATH = "/usr/local/share/julia" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Capture exit codes from upstream processes when piping @@ -261,6 +266,7 @@ includeConfig 'conf/modules/base.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/germlinecnvcaller_cohort.config' includeConfig 'conf/modules/gens_pon.config' +includeConfig 'conf/modules/mutect2.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/nextflow_schema.json b/nextflow_schema.json index 1a2daee..f03260a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -142,6 +142,17 @@ } } }, + "mutect2_options": { + "title": "Mutect2 options", + "type": "object", + "description": "Options used by the mutect2 subworkflow", + "default": "", + "properties": { + "mutect2_pon_name": { + "type": "string" + } + } + }, "input_output_options": { "title": "Input/output options", "type": "object", @@ -192,14 +203,15 @@ "title": "Main options", "type": "object", "description": "Most common options used for the pipeline", + "required": ["tools"], "default": "", "properties": { "tools": { "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for building Panel of Normals or models.", - "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT,germlinecnvcaller", - "pattern": "^((cnvkit|germlinecnvcaller|gens)?,?)*(? return dict}.toList() + + ch_gendb_input = Channel.of([id:val_pon_norm]) + .combine(ch_vcf) + .combine(ch_index) + .combine(ch_gendb_intervals) + .combine(ch_dict_gendb) + .map{meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict]} + + GATK4_GENOMICSDBIMPORT ( ch_gendb_input, false, false, false ) + ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first()) + + // + //Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals. + // + GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict ) + ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first()) + + emit: + mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] + mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] + mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] + genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] + pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] + pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml new file mode 100644 index 0000000..2660836 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -0,0 +1,69 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_create_som_pon_gatk +description: Perform variant calling on a set of normal samples using mutect2 panel of normals mode. Group them into a genomicsdbworkspace using genomicsdbimport, then use this to create a panel of normals using createsomaticpanelofnormals. +keywords: + - gatk4 + - mutect2 + - genomicsdbimport + - createsomaticpanelofnormals + - variant_calling + - genomicsdb_workspace + - panel_of_normals +components: + - gatk4/mutect2 + - gatk4/genomicsdbimport + - gatk4/createsomaticpanelofnormals +input: + - ch_mutect2_in: + type: list + description: | + An input channel containing the following files: + - input: One or more BAM/CRAM files + - input_index: The index/indices from the BAM/CRAM file(s) + - interval_file: An interval file to be used with the mutect call + Structure: [ meta, input, input_index, interval_file ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mutect2_vcf: + type: list + description: List of compressed vcf files to be used to make the gendb workspace + pattern: "[ *.vcf.gz ]" + - mutect2_index: + type: list + description: List of indexes of mutect2_vcf files + pattern: "[ *vcf.gz.tbi ]" + - mutect2_stats: + type: list + description: List of stats files that pair with mutect2_vcf files + pattern: "[ *vcf.gz.stats ]" + - genomicsdb: + type: directory + description: Directory containing the files that compose the genomicsdb workspace. + pattern: "path/name_of_workspace" + - pon_vcf: + type: file + description: Panel of normal as compressed vcf file + pattern: "*.vcf.gz" + - pon_index: + type: file + description: Index of pon_vcf file + pattern: "*vcf.gz.tbi" +authors: + - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test new file mode 100644 index 0000000..57aee89 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_CREATE_SOM_PON_GATK" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_CREATE_SOM_PON_GATK" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_create_som_pon_gatk" + tag "gatk4" + tag "gatk4/mutect2" + tag "gatk4/genomicsdbimport" + tag "gatk4/createsomaticpanelofnormals" + + test("test_create_som_pon_gatk_bam") { + when { + workflow { + """ + // ch_mutect2_in + input[0] = Channel.of([ + [ id:'test1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + [] ], + [ + [ id:'test2' ], + file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + [] ] + ) + // ch_fasta + input[1] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)]) + // ch_fai + input[2] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)]) + // ch_dict + input[3] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists:true)]) + // str_pon_norm + input[4] = "test_panel" + // ch_interval_file + input[5] = Channel.value(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(file(workflow.out.mutect2_vcf.get(0).get(1)).name).match("test1.vcf.gz") }, + { assert snapshot(file(workflow.out.mutect2_index.get(0).get(1)).name).match("test1.vcf.gz.tbi") }, + { assert snapshot(file(workflow.out.mutect2_stats.get(0).get(1)).name).match("test1.vcf.gz.stats") }, + { assert snapshot(file(workflow.out.pon_vcf.get(0).get(1)).name).match("test_panel.vcf.gz") }, + { assert snapshot(file(workflow.out.pon_index.get(0).get(1)).name).match("test_panel.vcf.gz.tbi") }, + ) + } + } +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap new file mode 100644 index 0000000..4c0d88a --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "test_panel.vcf.gz": { + "content": [ + "test_panel.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.103667303" + }, + "test1.vcf.gz.stats": { + "content": [ + "test1.vcf.gz.stats" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.102164313" + }, + "test_panel.vcf.gz.tbi": { + "content": [ + "test_panel.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.105382853" + }, + "test1.vcf.gz": { + "content": [ + "test1.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.098085724" + }, + "test1.vcf.gz.tbi": { + "content": [ + "test1.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T06:59:54.100765684" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config new file mode 100644 index 0000000..6a98618 --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_MUTECT2 { + ext.args = "--max-mnp-distance 0" + } +} diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml new file mode 100644 index 0000000..bb1b93c --- /dev/null +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_create_som_pon_gatk: + - subworkflows/nf-core/bam_create_som_pon_gatk/** diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config index 0fa4aba..d0a926b 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=23.10.0' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config index 0fa4aba..d0a926b 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -3,7 +3,7 @@ manifest { author = """nf-core""" homePage = 'https://127.0.0.1' description = """Dummy pipeline""" - nextflowVersion = '!>=23.10.0' + nextflowVersion = '!>=23.04.0' version = '9.9.9' doi = 'https://doi.org/10.5281/zenodo.5070524' } diff --git a/tests/config/tags.yml b/tests/config/tags.yml index 3672b33..2deb666 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -10,7 +10,16 @@ default: cnvkit: - conf/modules/cnvkit.config - - modules/nf-core/cnvkit/batch/main.nf + - modules/nf-core/cnvkit/batch/** + - tests/cnvkit.nf.test + +mutect2: + - conf/modules/mutect2.config + - modules/nf-core/gatk4/mutect2/** + - modules/nf-core/gatk4/genomicsdbimport/** + - modules/nf-core/gatk4/createsomaticpanelofnormals/** + - modules/nf-core/cnvkit/batch/** + - subworkflows/nf-core/bam_create_som_pon_gatk/** - tests/cnvkit.nf.test germlinecnvcaller_cohort: diff --git a/tests/csv/1.0.0/bam.csv b/tests/csv/1.0.0/bam.csv new file mode 100644 index 0000000..b95f604 --- /dev/null +++ b/tests/csv/1.0.0/bam.csv @@ -0,0 +1,3 @@ +sample,bam,bai +sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai +sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai diff --git a/tests/csv/1.0.0/cram.csv b/tests/csv/1.0.0/cram.csv deleted file mode 100644 index e132a68..0000000 --- a/tests/csv/1.0.0/cram.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,bam -sample1,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam -sample2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam diff --git a/tests/pipeline/default.nf.test.snap b/tests/pipeline/default.nf.test.snap index e921a19..4d21008 100644 --- a/tests/pipeline/default.nf.test.snap +++ b/tests/pipeline/default.nf.test.snap @@ -3,16 +3,24 @@ "content": [ "{CNVKIT_BATCH={cnvkit=0.9.10}, Workflow={nf-core/createpanelrefs=v1.0dev}}" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, "timestamp": "2024-02-21T12:37:23.523857103" }, "cnvkit": { "content": [ - "panel.cnn:md5,07dea67088da689ad04012552c606882", - "test.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test.paired_end.sorted.targetcoverage.cnn:md5,ff526714696aa49bdc1dc8d00d965266", - "test2.paired_end.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", - "test2.paired_end.sorted.targetcoverage.cnn:md5,6ae6b3fce7299eedca6133d911c38fe1" + "panel.cnn:md5,68028cd2b4e0fc4489bf5bfd0a73440f", + "test.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,0067cc3a0e479b23ab3bf056cead31b4", + "test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn:md5,203caf8cef6935bb50b4138097955cb8", + "test2.paired_end.recalibrated.sorted.targetcoverage.cnn:md5,1e1012812eb893afd931485cb760294e" ], - "timestamp": "2023-07-08T16:47:57+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-03T16:55:32.197815" } } \ No newline at end of file diff --git a/tests/pipeline/mutect2.nf.test b/tests/pipeline/mutect2.nf.test new file mode 100644 index 0000000..767e0f9 --- /dev/null +++ b/tests/pipeline/mutect2.nf.test @@ -0,0 +1,26 @@ +nextflow_pipeline { + + name "Test MUTECT2_PON" + script "main.nf" + tag "MUTECT2" + + test("Run MUTECT2 test") { + + when { + params { + outdir = "$outputDir" + tools = 'mutect2' + mutect2_pon_name = 'test' + validationSchemaIgnoreParams = 'genomes,baseDir,base-dir,outputDir,output-dir' + } + } + + then { + assertAll( + { assert workflow.success }, + { assert new File("$outputDir/multiqc/").exists() }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") } + ) + } + } +} diff --git a/tests/pipeline/mutect2.nf.test.snap b/tests/pipeline/mutect2.nf.test.snap new file mode 100644 index 0000000..95bd5fc --- /dev/null +++ b/tests/pipeline/mutect2.nf.test.snap @@ -0,0 +1,12 @@ +{ + "software_versions": { + "content": [ + "{GATK4_CREATESOMATICPANELOFNORMALS={gatk4=4.5.0.0}, GATK4_GENOMICSDBIMPORT={gatk4=4.5.0.0}, GATK4_MUTECT2={gatk4=4.5.0.0}, Workflow={nf-core/createpanelrefs=v1.0dev}}" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-04-03T17:06:08.093342" + } +} \ No newline at end of file diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 8a76ba9..ac8b2be 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -17,6 +17,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crea include { GENS_PON } from '../subworkflows/local/gens_pon' include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' +include { BAM_CREATE_SOM_PON_GATK } from '../subworkflows/nf-core/bam_create_som_pon_gatk' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -27,26 +28,25 @@ include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvc include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' - // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName],exclude]}.collect() - : Channel.value([[:],[]]) -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) -ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName], dict]}.collect() + : Channel.empty() +ch_exclude_bed = params.exclude_bed ? Channel.fromPath(params.exclude_bed).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_exclude_interval_list = params.exclude_interval_list ? Channel.fromPath(params.exclude_interval_list).map { exclude -> [[id:exclude.baseName], exclude]}.collect() + : Channel.value([[id:'null'], []]) +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName], fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName], fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() + : Channel.empty() +ch_target_bed = params.target_bed ? Channel.fromPath(params.target_bed).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) +ch_target_interval_list = params.target_interval_list ? Channel.fromPath(params.target_interval_list).map { targets -> [[id:targets.baseName], targets]}.collect() + : Channel.value([[id:'null'], []]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -59,7 +59,6 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.mu ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -78,10 +77,7 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('cnvkit')) { ch_samplesheet - .map{ meta, bam, bai, cram, crai -> - new_meta = meta + [id:"panel"] - [new_meta, bam] - } + .map{ meta, bam, bai, cram, crai -> [meta + [id:'panel'], bam]} .groupTuple() .map {meta, bam -> [ meta, [], bam ]} .set { ch_cnvkit_input } @@ -94,8 +90,8 @@ workflow CREATEPANELREFS { ch_samplesheet .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] } .set { ch_germlinecnvcaller_input } @@ -112,12 +108,30 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } + if (params.tools && params.tools.split(',').contains('mutect2')) { + + ch_mutect2_input = ch_samplesheet.map{meta, bam, bai, cram, crai -> + if (bam) return [ meta + [data_type:'bam'], bam, bai, [] ] + if (cram) return [ meta + [data_type:'cram'], cram, crai, [] ] + } + + BAM_CREATE_SOM_PON_GATK(ch_mutect2_input, + ch_fasta, + ch_fai, + ch_dict, + params.mutect2_pon_name, + ch_target_bed.map{ meta, bed -> [ bed ] }) + + ch_versions = ch_versions.mix(BAM_CREATE_SOM_PON_GATK.out.versions) + + } + if (params.tools && params.tools.split(',').contains('gens')) { ch_samplesheet .map{meta, bam, bai, cram, crai -> - if (bam) return [ meta + [data_type:"bam"], bam, bai ] - if (cram) return [ meta + [data_type:"cram"], cram, crai ] + if (bam) return [ meta + [data_type:'bam'], bam, bai ] + if (cram) return [ meta + [data_type:'cram'], cram, crai ] } .set { ch_gens_input }