diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a0e6ba..f5f7efa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#48](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/48) Publish only main outputs by default - [#49](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/49) Add support for local frequency source - [#49](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/49) Pass java -Xmx option at the command line for exomiser +- [#53](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/53) Replace vep and tabix logic by a standard nf-core subworkflow ### `Fixed` - [#50](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/50) Use container tag 1.20 for splitMultiAllelics process diff --git a/conf/modules.config b/conf/modules.config index 611be0e..b8392e4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -72,7 +72,6 @@ process { withName: ENSEMBLVEP_VEP { container = 'ensemblorg/ensembl-vep:release_111.0' //sticking to v111 for now, but we should update this - publishDir = new_publish_dir([enabled: true]) def args_list = [ "--offline", "--format vcf", @@ -92,12 +91,12 @@ process { ext.prefix = {"variants.${meta.id}.vep"} } - // To publish the vep index file in the same output folder as the vep output - withName: 'vep_tabix' { - publishDir = new_publish_dir([ - enabled: true, - path: { "${params.outdir}/ensemblvep" } - ]) + withName: '.*VCF_ANNOTATE_ENSEMBLVEP.*' { + publishDir = new_publish_dir([enabled: true, path: { "${params.outdir}/ensemblvep" }]) + } + + withName: TABIX_TABIX { + container = 'staphb/htslib:1.20' } withName: 'splitMultiAllelics' { diff --git a/modules.json b/modules.json index 24bdb42..4a574a5 100644 --- a/modules.json +++ b/modules.json @@ -23,7 +23,7 @@ "ensemblvep/vep": { "branch": "master", "git_sha": "6e3585d9ad20b41adc7d271009f8cb5e191ecab4", - "installed_by": ["modules"] + "installed_by": ["modules", "vcf_annotate_ensemblvep"] }, "gatk4/genotypegvcfs": { "branch": "master", @@ -34,6 +34,11 @@ "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["vcf_annotate_ensemblvep"] } } }, @@ -53,6 +58,11 @@ "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", "installed_by": ["subworkflows"] + }, + "vcf_annotate_ensemblvep": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/tabix.nf b/modules/local/tabix.nf deleted file mode 100644 index befcc88..0000000 --- a/modules/local/tabix.nf +++ /dev/null @@ -1,23 +0,0 @@ -// This module does not follow nf-core standards. We plan to fix or replace it with an nf-core module in the future. -process tabix { - label 'tiny' - - input: - tuple val(meta), path(vcfFile) - - output: - tuple val(meta), path("*.tbi") - - script: - def args = task.ext.args ?: '' - - """ - tabix \\ - $vcfFile \\ - $args - """ - stub: - """ - touch ${vcfFile}.tbi - """ -} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 0000000..017c259 --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf new file mode 100644 index 0000000..13acd67 --- /dev/null +++ b/modules/nf-core/tabix/tabix/main.nf @@ -0,0 +1,45 @@ +process TABIX_TABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' : + 'biocontainers/htslib:1.20--h5efdd21_2' }" + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + tabix \\ + --threads $task.cpus \\ + $args \\ + $tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${tab}.tbi + touch ${tab}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml new file mode 100644 index 0000000..7864832 --- /dev/null +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -0,0 +1,58 @@ +name: tabix_tabix +description: create tabix index from a sorted bgzip tab-delimited genome file +keywords: + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file compressed with bgzip + pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" +output: + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: tabix index file + pattern: "*.{tbi}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: coordinate sorted index file + pattern: "*.{csi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test new file mode 100644 index 0000000..102b0d7 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -0,0 +1,136 @@ +nextflow_process { + + name "Test Process TABIX_TABIX" + script "modules/nf-core/tabix/tabix/main.nf" + process "TABIX_TABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/tabix" + + test("sarscov2_bedgz_tbi") { + config "./tabix_bed.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_bed' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name + ).match() } + ) + } + } + + test("sarscov2_gff_tbi") { + config "./tabix_gff.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_gff' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name).match() } + ) + } + + } + + test("sarscov2_vcf_tbi") { + config "./tabix_vcf_tbi.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_vcf' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.tbi[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2_vcf_csi") { + config "./tabix_vcf_csi.config" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.csi[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2_vcf_csi_stub") { + config "./tabix_vcf_csi.config" + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi_stub' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out, + file(process.out.csi[0][1]).name + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap new file mode 100644 index 0000000..c2b9ed0 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -0,0 +1,212 @@ +{ + "sarscov2_gff_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "genome.gff3.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:25.653807564" + }, + "sarscov2_bedgz_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.bed.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:09.754082161" + }, + "sarscov2_vcf_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:40.042648294" + }, + "sarscov2_vcf_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:07:08.700367261" + }, + "sarscov2_vcf_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" + ] + ], + "2": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ], + "csi": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" + ] + ], + "tbi": [ + + ], + "versions": [ + "versions.yml:md5,07064637fb8a217174052be8e40234e2" + ] + }, + "test.vcf.gz.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-19T12:06:55.362067748" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_bed.config b/modules/nf-core/tabix/tabix/tests/tabix_bed.config new file mode 100644 index 0000000..7ff0590 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_bed.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p bed' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_gff.config b/modules/nf-core/tabix/tabix/tests/tabix_gff.config new file mode 100644 index 0000000..20c0a1e --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_gff.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p gff' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config new file mode 100644 index 0000000..eb4f2d7 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config new file mode 100644 index 0000000..2774c8a --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tags.yml b/modules/nf-core/tabix/tabix/tests/tags.yml new file mode 100644 index 0000000..6eda065 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/tabix: + - "modules/nf-core/tabix/tabix/**" diff --git a/nextflow.config b/nextflow.config index c8cde79..5edd92e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -228,9 +228,6 @@ process { withName: 'variantRecalibratorSNP|variantRecalibratorIndel|applyVQSRIndel|applyVQSRSNP|gatherVCF' { container = 'broadinstitute/gatk:4.5.0.0' } - withName: tabix { - container = 'staphb/htslib:1.19' - } //see conf/base.config for the performance options of defined by nf-core standards withName: 'BCFTOOLS_FILTER|BCFTOOLS_NORM|BCFTOOLS_VIEW' { errorStrategy = 'retry' @@ -280,7 +277,7 @@ process { disk = { check_max( 30.GB * task.attempt, 'disk' ) } time = { check_max( 10.h * task.attempt, 'time' ) } } - withName: 'tabix' { + withName: 'TABIX_TABIX' { errorStrategy = 'retry' maxRetries = 2 cpus = { check_max( 2 * task.attempt, 'cpus' ) } diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf new file mode 100644 index 0000000..291eddc --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf @@ -0,0 +1,45 @@ +// +// Run VEP to annotate VCF files +// + +include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' + +workflow VCF_ANNOTATE_ENSEMBLVEP { + take: + ch_vcf // channel: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)]] + ch_fasta // channel: [ val(meta2), path(fasta) ] (optional) + val_genome // value: genome to use + val_species // value: species to use + val_cache_version // value: cache version to use + ch_cache // channel: [ val(meta3), path(cache) ] (optional) + ch_extra_files // channel: [ path(file1), path(file2)... ] (optional) + + main: + ch_versions = Channel.empty() + + ENSEMBLVEP_VEP( + ch_vcf, + val_genome, + val_species, + val_cache_version, + ch_cache, + ch_fasta, + ch_extra_files + ) + + TABIX_TABIX(ENSEMBLVEP_VEP.out.vcf) + + ch_vcf_tbi = ENSEMBLVEP_VEP.out.vcf.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ val(meta), path(vcf), path(tbi) ] + json = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json) ] + tab = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab) ] + reports = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml new file mode 100644 index 0000000..15d42da --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml @@ -0,0 +1,65 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: vcf_annotate_ensemblvep +description: Perform annotation with ensemblvep and bgzip + tabix index the resulting VCF file +keywords: + - vcf + - annotation + - ensemblvep +components: + - ensemblvep/vep + - tabix/tabix +input: + - ch_vcf: + description: | + vcf file to annotate + Structure: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)] ] + - ch_fasta: + description: | + Reference genome fasta file (optional) + Structure: [ val(meta2), path(fasta) ] + - val_genome: + type: string + description: genome to use + - val_species: + type: string + description: species to use + - val_cache_version: + type: integer + description: cache version to use + - ch_cache: + description: | + the root cache folder for ensemblvep (optional) + Structure: [ val(meta3), path(cache) ] + - ch_extra_files: + description: | + any extra files needed by plugins for ensemblvep (optional) + Structure: [ path(file1), path(file2)... ] +output: + - vcf_tbi: + description: | + Compressed vcf file + tabix index + Structure: [ val(meta), path(vcf), path(tbi) ] + - json: + description: | + json file + Structure: [ val(meta), path(json) ] + - tab: + description: | + tab file + Structure: [ val(meta), path(tab) ] + - reports: + type: file + description: html reports + pattern: "*.html" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/workflows/postprocessing.nf b/workflows/postprocessing.nf index 2579cb6..c621f9a 100644 --- a/workflows/postprocessing.nf +++ b/workflows/postprocessing.nf @@ -5,23 +5,22 @@ */ //modules and subworkflows -include { paramsSummaryMap } from 'plugin/nf-validation' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { EXCLUDE_MNPS } from "../subworkflows/local/exclude_mnps" -include { VQSR } from "../subworkflows/local/vqsr" -include { BCFTOOLS_VIEW } from '../modules/nf-core/bcftools/view/main' -include { EXOMISER } from '../modules/local/exomiser' -include { splitMultiAllelics } from '../modules/local/split_multi_allelics' -include { ENSEMBLVEP_VEP } from '../modules/nf-core/ensemblvep/vep/main' -include { tabix as vep_tabix } from '../modules/local/tabix' -include { COMBINEGVCFS } from '../modules/local/combine_gvcfs' -include { GATK4_GENOTYPEGVCFS } from '../modules/nf-core/gatk4/genotypegvcfs' -include { GATK4_VARIANTFILTRATION} from '../modules/nf-core/gatk4/variantfiltration' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { EXCLUDE_MNPS } from "../subworkflows/local/exclude_mnps" +include { VQSR } from "../subworkflows/local/vqsr" +include { BCFTOOLS_VIEW } from '../modules/nf-core/bcftools/view/main' +include { EXOMISER } from '../modules/local/exomiser' +include { splitMultiAllelics } from '../modules/local/split_multi_allelics' +include { VCF_ANNOTATE_ENSEMBLVEP } from '../subworkflows/nf-core/vcf_annotate_ensemblvep/main' +include { COMBINEGVCFS } from '../modules/local/combine_gvcfs' +include { GATK4_GENOTYPEGVCFS } from '../modules/nf-core/gatk4/genotypegvcfs' +include { GATK4_VARIANTFILTRATION } from '../modules/nf-core/gatk4/variantfiltration' //functions -include { isExomiserToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' -include { isVepToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' +include { isExomiserToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' +include { isVepToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' def HOMO_SAPIENS_SPECIES = "homo_sapiens" @@ -94,22 +93,22 @@ def exomiser(inputChannel, } def vep(input_channel, vep_genome, vep_species, path_fasta, vep_cache, vep_cache_version) { - + def ch_input_for_vep = input_channel.map{meta, files -> def vcf_file = files.find { it.name.endsWith("vcf.gz") } def custom_extra_files = [] [meta, vcf_file, custom_extra_files] } - - return ENSEMBLVEP_VEP( - ch_input_for_vep, + + return VCF_ANNOTATE_ENSEMBLVEP( + ch_input_for_vep, // meta, vcf, optional_custom_files + [[:], path_fasta], // meta2, fasta vep_genome, vep_species, vep_cache_version, vep_cache, - [[:], path_fasta], // meta2, fasta [] //extra files - ) + ).vcf_tbi } process writemeta{ @@ -224,10 +223,10 @@ workflow POSTPROCESSING { //tag variants that are probable artifacts def ch_output_from_tagArtifacts = tagArtifacts(ch_output_from_genotypegvcf, params.hardFilters,pathReferenceGenomeFasta,pathReferenceGenomeFai,pathReferenceDict) - //tag frequent mutations in the population + //normalize variants def ch_output_from_splitMultiAllelics = splitMultiAllelics(ch_output_from_tagArtifacts, referenceGenome) - //Annotating mutations + //Annotating variants with VEP if (isVepToolIncluded()) { def vep_cache = file(params.vep_cache) @@ -239,7 +238,6 @@ workflow POSTPROCESSING { vep_cache, params.vep_cache_version ) - vep_tabix(ch_output_from_vep.vcf) } if (isExomiserToolIncluded()) {