diff --git a/CHANGELOG.md b/CHANGELOG.md index 0767506..0db15cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.4.1-dev - [date] +## Unreleased + +### `Added` +- [#61](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/61) Add download VEP module form nf-core + +### `Changed` +- [#59](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/59) Add ".snv" to VEP output filename prefix + +## v2.4.1-dev - 16/01/2025 ## v2.4.0-dev @@ -26,7 +34,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#54](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/54) Standardize exomiser output filenames -- [#59](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/59) Add ".snv" to VEP output filename prefix ### `Fixed` - [#50](https://github.com/Ferlab-Ste-Justine/Post-processing-Pipeline/pull/50) Use container tag 1.20 for splitMultiAllelics process diff --git a/README.md b/README.md index 805dd21..e1c98d8 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ It performs joint genotyping, tags low-quality variants, and optionally annotate 6. Tag false positive variants with either: - For whole genome sequencing data: [Variant quality score recalibration (VQSR)](https://gatk.broadinstitute.org/hc/en-us/articles/360036510892-VariantRecalibrator) - For whole exome sequencing data: [Hard-Filtering](https://gatk.broadinstitute.org/hc/en-us/articles/360036733451-VariantFiltration) -7. Optionnally annotate variants with [Variant effect predictor (VEP)](https://useast.ensembl.org/info/docs/tools/vep/index.html) +7. Optionnally annotate variants with [Variant effect predictor (VEP)](https://useast.ensembl.org/info/docs/tools/vep/index.html) and download reference cache (if not provided) 8. Optionnally integrate phenotype data to annotate, filter and prioritise variants likely to be disease-causing with [exomiser](https://www.sanger.ac.uk/tool/exomiser/) @@ -76,7 +76,7 @@ See [docs/output.md](docs/output.md) for more details about pipeline outputs. ## Credits -Ferlab-Ste-Justine/Post-processing-Pipeline was originally written by Damien Geneste, David Morais, Felix-Antoine Le Sieur, Jeremy Costanza, Lysiane Bouchard. +Ferlab-Ste-Justine/Post-processing-Pipeline was originally written by Damien Geneste, David Morais, Felix-Antoine Le Sieur, Jeremy Costanza, Lysiane Bouchard, Georgette Femerling. ## Contributions and Support diff --git a/conf/modules.config b/conf/modules.config index 81fa961..72b8022 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -71,6 +71,18 @@ process { ext.args = { "--output-filename=${meta.id}.exomiser" } } + withName: ENSEMBLVEP_DOWNLOAD { + // Using the conda container as the official one from esemblorg does not have the download_vep function used in nf-core. + container = 'quay.io/biocontainers/ensembl-vep:111.0--pl5321h2a3209d_0' + ext.when = { params.tools && (params.tools.split(',').contains('vep')) } + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } + withName: ENSEMBLVEP_VEP { container = 'ensemblorg/ensembl-vep:release_111.0' //sticking to v111 for now, but we should update this def args_list = [ diff --git a/docs/reference_data.md b/docs/reference_data.md index f4ab74f..4e4765e 100644 --- a/docs/reference_data.md +++ b/docs/reference_data.md @@ -46,7 +46,7 @@ For more details, see [Gatk documentation](https://gatk.broadinstitute.org/hc/en ## VEP Cache Directory -The `vepCache` parameter specifies the directory for the vep cache. It is only required if `vep` is specified via the +The `vep_cache` parameter specifies the directory for the vep cache. It is only required if `vep` is specified via the `tools` parameter. The vep cache is not automatically populated by the pipeline. It must be pre-downloaded. You can obtain a copy of the diff --git a/docs/usage.md b/docs/usage.md index 5abc755..86ae066 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -194,7 +194,11 @@ Parameters summary | `broad` | _Optional_ | Path to the directory containing Broad reference data (for VQSR) | | `intervalsFile` | _Optional_ | Path to the file containg the genome intervals list on which to operate | | `tools` | _Optional_ | Additional tools to run separated by commas. Supported tools are `vep` and `exomiser` | -| `vepCache` | _Optional_ | Path to the vep cache data directory | +| `vep_cache` | _Optional_ | Path to the vep cache data directory | +| `vep_cache_version` | _Optional_ | Version of the vep cache. e.g. `111` | +| `vep_genome` | _Optional_ | Genome assembly version of the vep cache | +| `download_cache` | _Optional_ | Download vep cache (default: false) | +| `outdir_cache` | _Optional_ | Path to write the cache to. If not declared, cache will be written to `/cache/` | | `exclude_mnps` | _Optional_ | Replace MNPs by individual SNPs (default: true). Must be true on whole genome data. | | `exomiser_data_dir` | _Optional_ | Path to the exomiser reference data directory | | `exomiser_genome` | _Optional_ | Genome assembly version to be used by exomiser(`hg19` or `hg38`) | diff --git a/modules.json b/modules.json index 4a574a5..48298c5 100644 --- a/modules.json +++ b/modules.json @@ -20,6 +20,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "ensemblvep/download": { + "branch": "master", + "git_sha": "6e3585d9ad20b41adc7d271009f8cb5e191ecab4", + "installed_by": ["modules"] + }, "ensemblvep/vep": { "branch": "master", "git_sha": "6e3585d9ad20b41adc7d271009f8cb5e191ecab4", diff --git a/modules/nf-core/ensemblvep/download/environment.yml b/modules/nf-core/ensemblvep/download/environment.yml new file mode 100644 index 0000000..3d36eb1 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::ensembl-vep=113.0 diff --git a/modules/nf-core/ensemblvep/download/main.nf b/modules/nf-core/ensemblvep/download/main.nf new file mode 100644 index 0000000..0664a2d --- /dev/null +++ b/modules/nf-core/ensemblvep/download/main.nf @@ -0,0 +1,47 @@ +process ENSEMBLVEP_DOWNLOAD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:113.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:113.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), val(assembly), val(species), val(cache_version) + + output: + tuple val(meta), path(prefix), emit: cache + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: 'vep_cache' + """ + vep_install \\ + --CACHEDIR $prefix \\ + --SPECIES $species \\ + --ASSEMBLY $assembly \\ + --CACHE_VERSION $cache_version \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: 'vep_cache' + """ + mkdir $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ensemblvep/download/meta.yml b/modules/nf-core/ensemblvep/download/meta.yml new file mode 100644 index 0000000..8da9621 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/meta.yml @@ -0,0 +1,53 @@ +name: ensemblvep_download +description: Ensembl Variant Effect Predictor (VEP). The cache downloading options + are controlled through `task.ext.args`. +keywords: + - annotation + - cache + - download +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: string + description: | + Genome assembly + - species: + type: string + description: | + Specie + - cache_version: + type: string + description: | + cache version +output: + - cache: + - meta: + type: file + description: cache + pattern: "*" + - prefix: + type: file + description: cache + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/ensemblvep/download/tests/main.nf.test b/modules/nf-core/ensemblvep/download/tests/main.nf.test new file mode 100644 index 0000000..a558599 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process ENSEMBLVEP_DOWNLOAD" + script "../main.nf" + process "ENSEMBLVEP_DOWNLOAD" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ensemblvep" + tag "ensemblvep/download" + + test("celegans - download") { + + when { + process { + """ + input[0] = Channel.of([ + [id:"113_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("celegans - download - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [id:"113_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap new file mode 100644 index 0000000..706bd28 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap @@ -0,0 +1,322 @@ +{ + "celegans - download": { + "content": [ + { + "0": [ + [ + { + "id": "113_WBcel235" + }, + [ + [ + [ + [ + "1-1000000.gz:md5,cadcba92b0999210dd8d832505d2e4c4", + "10000001-11000000.gz:md5,998a75dd927d10d45f8eebeef5fc7a75", + "1000001-2000000.gz:md5,a5cb3adb1ec9f40eed6a355d1492ba9b", + "11000001-12000000.gz:md5,46e6917f51093e28cce061774b9ed158", + "12000001-13000000.gz:md5,0adffacf8482d6c224df27104f65c9d6", + "13000001-14000000.gz:md5,aee759d812fc900a980ab0c4c5bd0273", + "14000001-15000000.gz:md5,f65537a3f76c40e63b6deb0b6cdb09dc", + "15000001-16000000.gz:md5,379f092ad1afa888da1fc13e80535def", + "2000001-3000000.gz:md5,86839741524579fd089498d6bee44dff", + "3000001-4000000.gz:md5,509b28af3920427e951f00b6973b5df4", + "4000001-5000000.gz:md5,f606e69cf59b0bdf2b61653608d955a6", + "5000001-6000000.gz:md5,a14ce1e21856e4a77ed63c67cbdfb26a", + "6000001-7000000.gz:md5,e1a895d6e8b352182b53ed1d0ce6e24e", + "7000001-8000000.gz:md5,ddf91b60f636d26b68b6bab3520b6b32", + "8000001-9000000.gz:md5,57482b996f89e92bbd0196efa4915cd3", + "9000001-10000000.gz:md5,43b5d89f84236b49b384d7f37f928129" + ], + [ + "1-1000000.gz:md5,d18811781848f70baef0b0348190d7ce", + "10000001-11000000.gz:md5,19011165abc56233ea0c5b0e6938d9c9", + "1000001-2000000.gz:md5,5e720fa191f3c9ac799b6a071bcc4332", + "11000001-12000000.gz:md5,b19c46fb00ca13a2a31128bd1829ddf5", + "12000001-13000000.gz:md5,54354b0870ca96641c51ed63382da007", + "13000001-14000000.gz:md5,6954fdc223f58eb406e602752ab7d139", + "14000001-15000000.gz:md5,929275a1cfea883999dddc20931a2e72", + "15000001-16000000.gz:md5,5f5b783a589a1fd80cc565e6f339c540", + "2000001-3000000.gz:md5,54e476e0e9f4a5d973ee710fd824abc7", + "3000001-4000000.gz:md5,d78d4a63165429fdb3a61b7cdbd3c43a", + "4000001-5000000.gz:md5,983f8efcebb7f62d7e7b1b3c0573d43e", + "5000001-6000000.gz:md5,e2cd03ed5b67b8ee123e4c4958508fe4", + "6000001-7000000.gz:md5,d04bc9335ba39ace20bce936e3a5cdeb", + "7000001-8000000.gz:md5,9354b26a9ba94aa5bc30f537c22382fb", + "8000001-9000000.gz:md5,b227c6ef81ab72d211d25dc4f44813b9", + "9000001-10000000.gz:md5,a6d7f29edd7c22139403a11cac989b7a" + ], + [ + "1-1000000.gz:md5,2117acb322a117a9c5db85c072575331", + "10000001-11000000.gz:md5,646c9582b56eb12ddbb1dd35b25c3670", + "1000001-2000000.gz:md5,ee433e4e5e37b2d008c43e1af4be0f8d", + "11000001-12000000.gz:md5,962fd6e52046484b3b123f9380ed64e9", + "12000001-13000000.gz:md5,1abf2d695c829eb2c88e0d3dbc739a1c", + "13000001-14000000.gz:md5,a6e03bf867f5cc694174a230f1b13a6b", + "2000001-3000000.gz:md5,a5b250aa9e3ee8cecc23bea0e2fa19a1", + "3000001-4000000.gz:md5,1390a6d2a28a4861b282d36d0fb85660", + "4000001-5000000.gz:md5,4bc7106bb2661aea28613c31935a5c8f", + "5000001-6000000.gz:md5,7317d6fbb3c77d7cdd31e781afab8f7d", + "6000001-7000000.gz:md5,1a3b6fa586e570c16b4833e34b28751e", + "7000001-8000000.gz:md5,b7bcb06393682f621403afdf19bf87b4", + "8000001-9000000.gz:md5,0011675a8567d394da54a52480b35786", + "9000001-10000000.gz:md5,e4fa88e4ec57ed0c71fd21090d8aa17a" + ], + [ + "1-1000000.gz:md5,a47af22d33275652036ddf7161699c7c", + "10000001-11000000.gz:md5,7fc129e7edbaa5be87306de417c2ef28", + "1000001-2000000.gz:md5,cbc12c339741df5ad06bf9a946be6c93", + "11000001-12000000.gz:md5,d1cc5e20e3d3402debdc102087a5407f", + "12000001-13000000.gz:md5,42c69c8e86d28151e9a8b1787dbee125", + "13000001-14000000.gz:md5,c7459d1789a833e8a898ebdbc607e7d8", + "14000001-15000000.gz:md5,5806b20108f56d9eeabcdd4f8450dca3", + "15000001-16000000.gz:md5,78e859f70026a05be43d48b9b272f287", + "16000001-17000000.gz:md5,539db7fc976bee4b6031f8dcb6a4641d", + "17000001-18000000.gz:md5,f3ea55e7552dc36734d6e8ba67d1e4c2", + "2000001-3000000.gz:md5,539013ecfdcd06eb653445f857265322", + "3000001-4000000.gz:md5,beb9701b402bd5ddc46a4da6e531f783", + "4000001-5000000.gz:md5,3f46efb2635850cc6c3d8ae51727a400", + "5000001-6000000.gz:md5,e11549bca12c5e2a7a208a997fda1c68", + "6000001-7000000.gz:md5,c0f3546c6859dc1a5fe9ff7f015ecd7e", + "7000001-8000000.gz:md5,344b72822f647819f4ee6b5afa9d7701", + "8000001-9000000.gz:md5,1c06d285ff5c53f89f073212343902b7", + "9000001-10000000.gz:md5,79140e754039c6d6fc6eeecddcf2aa8e" + ], + [ + "1-1000000.gz:md5,40ef48190d3269cd4112450bc717b1ef" + ], + [ + "1-1000000.gz:md5,1a8739457c429931923ed77596a9ee54", + "10000001-11000000.gz:md5,316fa1d06fc1878b6a5995f4aee3e49d", + "1000001-2000000.gz:md5,3926c03a091850c909bd0ccfc7133c0b", + "11000001-12000000.gz:md5,29ca11d2f05051cc439a0d24a9db134c", + "12000001-13000000.gz:md5,a46f648554e91999652019516c933754", + "13000001-14000000.gz:md5,167b126d1c690a0e7e25fc5ccd09fb7c", + "14000001-15000000.gz:md5,645554c896133c476c3083302371bcf8", + "15000001-16000000.gz:md5,60fc48d9a7aff6286fc6630c46bcfebc", + "16000001-17000000.gz:md5,07e1750d1c95a61e96774d2cf3da4d89", + "17000001-18000000.gz:md5,59d084309f6a975ec1066a828b5845ba", + "18000001-19000000.gz:md5,868c12d305dbd4d04399ec7848804328", + "19000001-20000000.gz:md5,6de03a00061f6a88dcbbb8ed5fc0b8dc", + "20000001-21000000.gz:md5,732b956f13da9ef01f9de3355d12e28b", + "2000001-3000000.gz:md5,7c7528266c523cad419ea25e75d9566e", + "3000001-4000000.gz:md5,bb2283c0cfb0e4601fc535a4d51e6f2d", + "4000001-5000000.gz:md5,64c7f28f554414a88c886b0bcadb3c39", + "5000001-6000000.gz:md5,58e7106fe577a8b5e5c698445b4f0c33", + "6000001-7000000.gz:md5,2e309d12cf1c1c6276585f457ceeacc2", + "7000001-8000000.gz:md5,08cb0600f7806608f0103187a6c9c64e", + "8000001-9000000.gz:md5,869333c2615f714860d17d794640d4ad", + "9000001-10000000.gz:md5,b85cc861c6a3b30cf6f06c8af136b383" + ], + [ + "1-1000000.gz:md5,c8d97b084c159c3cb5be1fff4637dfce", + "10000001-11000000.gz:md5,f441f2af06fd4973749dfbfbef40fe1b", + "1000001-2000000.gz:md5,c42a1526a836cfacefb67e9217f648aa", + "11000001-12000000.gz:md5,264421c249c696b45c92e2611285fee7", + "12000001-13000000.gz:md5,e673d1fdbe7dc0d09bea3d11a5797d6a", + "13000001-14000000.gz:md5,88f4f84e63b362f1b4f800c48b37e82c", + "14000001-15000000.gz:md5,26282f2b305ed82fb9f8875e97361105", + "15000001-16000000.gz:md5,30b9132c2610d42919ba231d1adbef2a", + "16000001-17000000.gz:md5,3d0e975ccd1ae4e92bf1d9d915ed293f", + "17000001-18000000.gz:md5,7db5b3819da3df1e47fe757dc9c6f2ba", + "2000001-3000000.gz:md5,55f6130a8d5872bdc9f8eed231ad0f65", + "3000001-4000000.gz:md5,402b826dbf6993c207ad15483a44182b", + "4000001-5000000.gz:md5,43cf926d43db25af5724fb5077edfee1", + "5000001-6000000.gz:md5,f40276dbea3f6f9a75f9301d1253eb09", + "6000001-7000000.gz:md5,df0d2d38060d4e7c606072ae814b1f38", + "7000001-8000000.gz:md5,c4117cc51255c0a91c51ff43403f00f7", + "8000001-9000000.gz:md5,59a4ebadca27041634c58652c544c8dd", + "9000001-10000000.gz:md5,c54510616273a4d1bfa9d525dbbbca40" + ], + "chr_synonyms.txt:md5,d390f0bcc6fec9786bc66b75f2d4390b", + "info.txt:md5,249c88c7a71464e048cca0c4b2a21198" + ] + ] + ] + ] + ], + "1": [ + "versions.yml:md5,25f0fd61e1a90ecec5427a9400ad6bc9" + ], + "cache": [ + [ + { + "id": "113_WBcel235" + }, + [ + [ + [ + [ + "1-1000000.gz:md5,cadcba92b0999210dd8d832505d2e4c4", + "10000001-11000000.gz:md5,998a75dd927d10d45f8eebeef5fc7a75", + "1000001-2000000.gz:md5,a5cb3adb1ec9f40eed6a355d1492ba9b", + "11000001-12000000.gz:md5,46e6917f51093e28cce061774b9ed158", + "12000001-13000000.gz:md5,0adffacf8482d6c224df27104f65c9d6", + "13000001-14000000.gz:md5,aee759d812fc900a980ab0c4c5bd0273", + "14000001-15000000.gz:md5,f65537a3f76c40e63b6deb0b6cdb09dc", + "15000001-16000000.gz:md5,379f092ad1afa888da1fc13e80535def", + "2000001-3000000.gz:md5,86839741524579fd089498d6bee44dff", + "3000001-4000000.gz:md5,509b28af3920427e951f00b6973b5df4", + "4000001-5000000.gz:md5,f606e69cf59b0bdf2b61653608d955a6", + "5000001-6000000.gz:md5,a14ce1e21856e4a77ed63c67cbdfb26a", + "6000001-7000000.gz:md5,e1a895d6e8b352182b53ed1d0ce6e24e", + "7000001-8000000.gz:md5,ddf91b60f636d26b68b6bab3520b6b32", + "8000001-9000000.gz:md5,57482b996f89e92bbd0196efa4915cd3", + "9000001-10000000.gz:md5,43b5d89f84236b49b384d7f37f928129" + ], + [ + "1-1000000.gz:md5,d18811781848f70baef0b0348190d7ce", + "10000001-11000000.gz:md5,19011165abc56233ea0c5b0e6938d9c9", + "1000001-2000000.gz:md5,5e720fa191f3c9ac799b6a071bcc4332", + "11000001-12000000.gz:md5,b19c46fb00ca13a2a31128bd1829ddf5", + "12000001-13000000.gz:md5,54354b0870ca96641c51ed63382da007", + "13000001-14000000.gz:md5,6954fdc223f58eb406e602752ab7d139", + "14000001-15000000.gz:md5,929275a1cfea883999dddc20931a2e72", + "15000001-16000000.gz:md5,5f5b783a589a1fd80cc565e6f339c540", + "2000001-3000000.gz:md5,54e476e0e9f4a5d973ee710fd824abc7", + "3000001-4000000.gz:md5,d78d4a63165429fdb3a61b7cdbd3c43a", + "4000001-5000000.gz:md5,983f8efcebb7f62d7e7b1b3c0573d43e", + "5000001-6000000.gz:md5,e2cd03ed5b67b8ee123e4c4958508fe4", + "6000001-7000000.gz:md5,d04bc9335ba39ace20bce936e3a5cdeb", + "7000001-8000000.gz:md5,9354b26a9ba94aa5bc30f537c22382fb", + "8000001-9000000.gz:md5,b227c6ef81ab72d211d25dc4f44813b9", + "9000001-10000000.gz:md5,a6d7f29edd7c22139403a11cac989b7a" + ], + [ + "1-1000000.gz:md5,2117acb322a117a9c5db85c072575331", + "10000001-11000000.gz:md5,646c9582b56eb12ddbb1dd35b25c3670", + "1000001-2000000.gz:md5,ee433e4e5e37b2d008c43e1af4be0f8d", + "11000001-12000000.gz:md5,962fd6e52046484b3b123f9380ed64e9", + "12000001-13000000.gz:md5,1abf2d695c829eb2c88e0d3dbc739a1c", + "13000001-14000000.gz:md5,a6e03bf867f5cc694174a230f1b13a6b", + "2000001-3000000.gz:md5,a5b250aa9e3ee8cecc23bea0e2fa19a1", + "3000001-4000000.gz:md5,1390a6d2a28a4861b282d36d0fb85660", + "4000001-5000000.gz:md5,4bc7106bb2661aea28613c31935a5c8f", + "5000001-6000000.gz:md5,7317d6fbb3c77d7cdd31e781afab8f7d", + "6000001-7000000.gz:md5,1a3b6fa586e570c16b4833e34b28751e", + "7000001-8000000.gz:md5,b7bcb06393682f621403afdf19bf87b4", + "8000001-9000000.gz:md5,0011675a8567d394da54a52480b35786", + "9000001-10000000.gz:md5,e4fa88e4ec57ed0c71fd21090d8aa17a" + ], + [ + "1-1000000.gz:md5,a47af22d33275652036ddf7161699c7c", + "10000001-11000000.gz:md5,7fc129e7edbaa5be87306de417c2ef28", + "1000001-2000000.gz:md5,cbc12c339741df5ad06bf9a946be6c93", + "11000001-12000000.gz:md5,d1cc5e20e3d3402debdc102087a5407f", + "12000001-13000000.gz:md5,42c69c8e86d28151e9a8b1787dbee125", + "13000001-14000000.gz:md5,c7459d1789a833e8a898ebdbc607e7d8", + "14000001-15000000.gz:md5,5806b20108f56d9eeabcdd4f8450dca3", + "15000001-16000000.gz:md5,78e859f70026a05be43d48b9b272f287", + "16000001-17000000.gz:md5,539db7fc976bee4b6031f8dcb6a4641d", + "17000001-18000000.gz:md5,f3ea55e7552dc36734d6e8ba67d1e4c2", + "2000001-3000000.gz:md5,539013ecfdcd06eb653445f857265322", + "3000001-4000000.gz:md5,beb9701b402bd5ddc46a4da6e531f783", + "4000001-5000000.gz:md5,3f46efb2635850cc6c3d8ae51727a400", + "5000001-6000000.gz:md5,e11549bca12c5e2a7a208a997fda1c68", + "6000001-7000000.gz:md5,c0f3546c6859dc1a5fe9ff7f015ecd7e", + "7000001-8000000.gz:md5,344b72822f647819f4ee6b5afa9d7701", + "8000001-9000000.gz:md5,1c06d285ff5c53f89f073212343902b7", + "9000001-10000000.gz:md5,79140e754039c6d6fc6eeecddcf2aa8e" + ], + [ + "1-1000000.gz:md5,40ef48190d3269cd4112450bc717b1ef" + ], + [ + "1-1000000.gz:md5,1a8739457c429931923ed77596a9ee54", + "10000001-11000000.gz:md5,316fa1d06fc1878b6a5995f4aee3e49d", + "1000001-2000000.gz:md5,3926c03a091850c909bd0ccfc7133c0b", + "11000001-12000000.gz:md5,29ca11d2f05051cc439a0d24a9db134c", + "12000001-13000000.gz:md5,a46f648554e91999652019516c933754", + "13000001-14000000.gz:md5,167b126d1c690a0e7e25fc5ccd09fb7c", + "14000001-15000000.gz:md5,645554c896133c476c3083302371bcf8", + "15000001-16000000.gz:md5,60fc48d9a7aff6286fc6630c46bcfebc", + "16000001-17000000.gz:md5,07e1750d1c95a61e96774d2cf3da4d89", + "17000001-18000000.gz:md5,59d084309f6a975ec1066a828b5845ba", + "18000001-19000000.gz:md5,868c12d305dbd4d04399ec7848804328", + "19000001-20000000.gz:md5,6de03a00061f6a88dcbbb8ed5fc0b8dc", + "20000001-21000000.gz:md5,732b956f13da9ef01f9de3355d12e28b", + "2000001-3000000.gz:md5,7c7528266c523cad419ea25e75d9566e", + "3000001-4000000.gz:md5,bb2283c0cfb0e4601fc535a4d51e6f2d", + "4000001-5000000.gz:md5,64c7f28f554414a88c886b0bcadb3c39", + "5000001-6000000.gz:md5,58e7106fe577a8b5e5c698445b4f0c33", + "6000001-7000000.gz:md5,2e309d12cf1c1c6276585f457ceeacc2", + "7000001-8000000.gz:md5,08cb0600f7806608f0103187a6c9c64e", + "8000001-9000000.gz:md5,869333c2615f714860d17d794640d4ad", + "9000001-10000000.gz:md5,b85cc861c6a3b30cf6f06c8af136b383" + ], + [ + "1-1000000.gz:md5,c8d97b084c159c3cb5be1fff4637dfce", + "10000001-11000000.gz:md5,f441f2af06fd4973749dfbfbef40fe1b", + "1000001-2000000.gz:md5,c42a1526a836cfacefb67e9217f648aa", + "11000001-12000000.gz:md5,264421c249c696b45c92e2611285fee7", + "12000001-13000000.gz:md5,e673d1fdbe7dc0d09bea3d11a5797d6a", + "13000001-14000000.gz:md5,88f4f84e63b362f1b4f800c48b37e82c", + "14000001-15000000.gz:md5,26282f2b305ed82fb9f8875e97361105", + "15000001-16000000.gz:md5,30b9132c2610d42919ba231d1adbef2a", + "16000001-17000000.gz:md5,3d0e975ccd1ae4e92bf1d9d915ed293f", + "17000001-18000000.gz:md5,7db5b3819da3df1e47fe757dc9c6f2ba", + "2000001-3000000.gz:md5,55f6130a8d5872bdc9f8eed231ad0f65", + "3000001-4000000.gz:md5,402b826dbf6993c207ad15483a44182b", + "4000001-5000000.gz:md5,43cf926d43db25af5724fb5077edfee1", + "5000001-6000000.gz:md5,f40276dbea3f6f9a75f9301d1253eb09", + "6000001-7000000.gz:md5,df0d2d38060d4e7c606072ae814b1f38", + "7000001-8000000.gz:md5,c4117cc51255c0a91c51ff43403f00f7", + "8000001-9000000.gz:md5,59a4ebadca27041634c58652c544c8dd", + "9000001-10000000.gz:md5,c54510616273a4d1bfa9d525dbbbca40" + ], + "chr_synonyms.txt:md5,d390f0bcc6fec9786bc66b75f2d4390b", + "info.txt:md5,249c88c7a71464e048cca0c4b2a21198" + ] + ] + ] + ] + ], + "versions": [ + "versions.yml:md5,25f0fd61e1a90ecec5427a9400ad6bc9" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-21T09:09:48.574969389" + }, + "celegans - download - stub": { + "content": [ + { + "0": [ + [ + { + "id": "113_WBcel235" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,25f0fd61e1a90ecec5427a9400ad6bc9" + ], + "cache": [ + [ + { + "id": "113_WBcel235" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,25f0fd61e1a90ecec5427a9400ad6bc9" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-21T09:10:03.728940123" + } +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/download/tests/nextflow.config b/modules/nf-core/ensemblvep/download/tests/nextflow.config new file mode 100644 index 0000000..0a4ae1a --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/nextflow.config @@ -0,0 +1,12 @@ +params { + vep_cache_version = "113" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" +} + +process { + withName: ENSEMBLVEP_DOWNLOAD { + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } + } +} diff --git a/modules/nf-core/ensemblvep/download/tests/tags.yml b/modules/nf-core/ensemblvep/download/tests/tags.yml new file mode 100644 index 0000000..26671f3 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/tags.yml @@ -0,0 +1,2 @@ +ensemblvep/download: + - "modules/nf-core/ensemblvep/download/**" diff --git a/nextflow.config b/nextflow.config index cd044df..9a723fe 100644 --- a/nextflow.config +++ b/nextflow.config @@ -26,6 +26,8 @@ params { vep_cache = null vep_cache_version = null vep_genome = null + download_cache = false + outdir_cache = null exomiser_genome = null exomiser_data_dir = null exomiser_data_version = null @@ -44,15 +46,17 @@ params { exclude_mnps = true TSfilterSNP = '99' TSfilterINDEL = '99' - hardFilters = [[name: 'QD2', expression: 'QD < 2.0'], - [name: 'QD1', expression: 'QD < 1.0'], - [name: 'QUAL30', expression: 'QUAL < 30.0'], - [name: 'SOR3', expression: 'SOR > 3.0'], - [name: 'FS60', expression: 'FS > 60.0'], - [name: 'MQ40', expression: 'MQ < 40.0'], - [name: 'MQRankSum-12.5', expression: 'MQRankSum < -12.5'], - [name: 'ReadPosRankSum-8', expression: 'ReadPosRankSum < -8.0']] - + hardFilters = [ + [name: 'QD2', expression: 'QD < 2.0'], + [name: 'QD1', expression: 'QD < 1.0'], + [name: 'QUAL30', expression: 'QUAL < 30.0'], + [name: 'SOR3', expression: 'SOR > 3.0'], + [name: 'FS60', expression: 'FS > 60.0'], + [name: 'MQ40', expression: 'MQ < 40.0'], + [name: 'MQRankSum-12.5', expression: 'MQRankSum < -12.5'], + [name: 'ReadPosRankSum-8', expression: 'ReadPosRankSum < -8.0'] + ] + allow_old_gatk_data = false @@ -270,7 +274,7 @@ process { disk = { check_max( 30.GB * task.attempt, 'disk' ) } time = { check_max( 10.h * task.attempt, 'time' ) } } - withName: 'GATK4_VARIANTFILTRATION' { + withName: 'GATK4_VARIANTFILTRATION' { errorStrategy = 'retry' maxRetries = 2 cpus = { check_max( 2 * task.attempt, 'cpus' ) } @@ -286,6 +290,14 @@ process { disk = { check_max( 30.GB * task.attempt, 'disk' ) } time = { check_max( 10.h * task.attempt, 'time' ) } } + withName: 'ENSEMBLVEP_DOWNLOAD' { + errorStrategy = 'retry' + maxRetries = 2 + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + disk = { check_max( 140.GB * task.attempt, 'disk' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } withName: 'ENSEMBLVEP_VEP' { errorStrategy = 'retry' maxRetries = 2 @@ -294,7 +306,7 @@ process { disk = { check_max( 80.GB * task.attempt, 'disk' ) } time = { check_max( 10.h * task.attempt, 'time' ) } } - withName: 'EXOMISER' { + withName: 'EXOMISER' { errorStrategy = 'retry' maxRetries = 2 cpus = { check_max( 6 * task.attempt, 'cpus' ) } @@ -302,7 +314,7 @@ process { disk = { check_max( 150.GB * task.attempt, 'disk' ) } time = { check_max( 10.h * task.attempt, 'time' ) } } - withName: 'writemeta' { + withName: 'writemeta' { container = 'ubuntu:24.10' } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 3da8249..2f260ad 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -88,10 +88,7 @@ "format": "file-path" } }, - "required": [ - "referenceGenome", - "referenceGenomeFasta" - ], + "required": ["referenceGenome", "referenceGenomeFasta"], "if": { "required": ["dbsnpFile"] }, @@ -227,7 +224,6 @@ "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", "default": true, - "hidden": true }, "validationShowHiddenParams": { @@ -272,6 +268,18 @@ }, "hardFilters": { "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "expression": { + "type": "string" + } + }, + "required": ["name", "expression"] + }, "description": "Parameters for Hard-Filtering", "help_text": "Parameters for Hard-Filtering. Must be an array containing each desired filter. Each filter must be formatted with the desired name and expression, for example\\n[[name: 'QD1', expression: 'QD < 1.0'],[name: 'QD2', expression: 'QD < 2.0]]" }, @@ -289,7 +297,7 @@ "type": "boolean", "description": "If true, exclude MNPs from the VEP annotation", "help_text": "If true (default), exclude MNPs from the VEP annotation. Must be true on whole genome data.", - "default": true + "default": true } } }, @@ -314,6 +322,16 @@ "type": "string", "description": "Will be used by vep to find the assembly version in the cache", "help text": "Ex: GRCh38" + }, + "download_cache": { + "type": "boolean", + "description": "Whether to download the vep cache or not" + }, + "outdir_cache": { + "type": "string", + "description": "Path where vep cache will be downloaded", + "default": "${outdir}/cache", + "format": "directory-path" } }, "if": { @@ -404,8 +422,7 @@ }, "exomiser_start_from_vep": { "type": "boolean", - "description": "If true, run the exomiser analysis on the VEP annotated VCF file", - "default": false + "description": "If true, run the exomiser analysis on the VEP annotated VCF file" } }, "allOf": [ diff --git a/workflows/postprocessing.nf b/workflows/postprocessing.nf index c9b5c06..15570c5 100644 --- a/workflows/postprocessing.nf +++ b/workflows/postprocessing.nf @@ -17,18 +17,18 @@ include { VCF_ANNOTATE_ENSEMBLVEP } from '../subworkflows/nf-core/vcf_annotate_e include { COMBINEGVCFS } from '../modules/local/combine_gvcfs' include { GATK4_GENOTYPEGVCFS } from '../modules/nf-core/gatk4/genotypegvcfs' include { GATK4_VARIANTFILTRATION } from '../modules/nf-core/gatk4/variantfiltration' +include { ENSEMBLVEP_DOWNLOAD } from '../modules/nf-core/ensemblvep/download/main' //functions include { isExomiserToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' include { isVepToolIncluded } from '../subworkflows/local/utils_nfcore_postprocessing_pipeline/utils' -def HOMO_SAPIENS_SPECIES = "homo_sapiens" - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + /** Tag variants that are probable artifacts In the case of whole genome sequencing data, we use the vqsr procedure. @@ -48,7 +48,7 @@ def tagArtifacts(ch_artifact_input, hardFilters, pathFasta, pathFai, pathDict) { [[:], pathDict]) def ch_variantfiltration_output = ch_gatk4_variantfiltration_output.vcf.join(ch_gatk4_variantfiltration_output.tbi) - .map{ meta, vcf, tbi -> [meta, [vcf,tbi]]} + .map{ meta, vcf, tbi -> [meta, [vcf,tbi]]} return ch_vqsr_output.concat(ch_variantfiltration_output) } @@ -144,9 +144,9 @@ def handle_mnps(input_channel, do_exclude_mnps) { /* - Deal with variations in input file formats, extensions, and the presence or absence of index files. - input: [meta, vcf] - output: [meta, vcf, tbi] +Deal with variations in input file formats, extensions, and the presence or absence of index files. +input: [meta, vcf] +output: [meta, vcf, tbi] */ def standardize_input_vcf_files(input_channel) { def view_input = input_channel.map{meta, vcf -> @@ -158,6 +158,11 @@ def standardize_input_vcf_files(input_channel) { } workflow POSTPROCESSING { + + take: + ch_samplesheet + + main: //Local Temp Params def referenceGenome = file(params.referenceGenome) def pathReferenceGenomeFasta = file(params.referenceGenome + "/" + params.referenceGenomeFasta) @@ -168,13 +173,10 @@ workflow POSTPROCESSING { def dbsnpFileIndex = params.dbsnpFileIndex? file(params.dbsnpFileIndex) : [] def exomiserLocalFrequencyFile = params.exomiser_local_frequency_path? file(params.exomiser_local_frequency_path) : [] def exomiserLocalFrequencyIndexFile = params.exomiser_local_frequency_index_path? file(params.exomiser_local_frequency_index_path) : [] - - file(params.outdir).mkdirs() - take: - ch_samplesheet - - main: + def HOMO_SAPIENS_SPECIES = "homo_sapiens" + + file(params.outdir).mkdirs() ch_versions = Channel.empty() @@ -227,7 +229,16 @@ workflow POSTPROCESSING { def ch_output_from_vep //declaring vep output channel early so that it can be accessed outside the if block //Annotating variants with VEP if (isVepToolIncluded()) { - def vep_cache = file(params.vep_cache) + + // Download VEP cache if download = true. Assuming we want to download even if cache provided. + if (params.download_cache) { + ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, HOMO_SAPIENS_SPECIES, params.vep_cache_version ]) + ENSEMBLVEP_DOWNLOAD(ensemblvep_info) + vep_cache = ENSEMBLVEP_DOWNLOAD.out.cache.collect().map{ _meta, cache -> [ cache ] } + ch_versions = ch_versions.mix(ENSEMBLVEP_DOWNLOAD.out.versions.first()) + } else { + vep_cache = file(params.vep_cache) + } ch_output_from_vep = vep( ch_output_from_splitMultiAllelics,