diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 58e6b6c9..53fd82c6 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -32,9 +32,6 @@ jobs: test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1155cd32..36ddf598 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,10 @@ on: workflow_dispatch: env: + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" + NFT_VER: "0.9.0" + NFT_WORKDIR: "~" NXF_ANSI_LOG: false NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity @@ -20,24 +24,22 @@ concurrency: jobs: test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" + name: "Test ${{ matrix.filter }} | ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/5" # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/rnavar') }}" runs-on: ubuntu-latest strategy: + fail-fast: false matrix: NXF_VER: - "24.04.2" - "latest-everything" - profile: - - "conda" - - "docker" - - "singularity" - test_name: - - "test" + filter: ["pipeline"] + # filter: ["process", "workflow", "function", "pipeline"] + profile: ["conda", "docker", "singularity"] + shard: [1, 2, 3, 4] isMaster: - ${{ github.base_ref == 'master' }} - # Exclude conda and singularity on dev exclude: - isMaster: false profile: "conda" @@ -46,12 +48,19 @@ jobs: steps: - name: Check out pipeline code uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + with: + fetch-depth: 0 - name: Set up Nextflow uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" + - name: Set up nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Set up Apptainer if: matrix.profile == 'singularity' uses: eWaterCycle/setup-apptainer@main @@ -62,6 +71,18 @@ jobs: mkdir -p $NXF_SINGULARITY_CACHEDIR mkdir -p $NXF_SINGULARITY_LIBRARYDIR + - name: Cache pdiff + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4 + id: cache-pip-pdiff + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-pdiff + + - name: Set up pdiff to see diff between nf-test snapshots + run: | + python -m pip install --upgrade pip + pip install pdiff cryptography + - name: Set up Miniconda if: matrix.profile == 'conda' uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 @@ -77,9 +98,29 @@ jobs: echo $(realpath $CONDA)/condabin >> $GITHUB_PATH echo $(realpath python) >> $GITHUB_PATH - - name: Clean up Disk space + - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + - name: "Run tests | ${{ matrix.filter }}_${{ matrix.profile }} | ${{ matrix.shard }}/5" + run: | + nf-test test \ + --ci \ + --debug \ + --verbose \ + --junitxml="TEST-${{ matrix.filter }}_${{ matrix.profile }}_${{ matrix.shard }}.xml" \ + --shard ${{ matrix.shard }}/5 \ + --changed-since HEAD^ \ + --follow-dependencies \ + --profile "+${{ matrix.profile }}" \ + --filter ${{ matrix.filter }} + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v4 + if: success() || failure() # always run even if the previous step fails + with: + report_paths: "TEST-*.xml" + + - name: Clean up + if: always() run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml index e8aafe44..d67ee08c 100644 --- a/.github/workflows/template_version_comment.yml +++ b/.github/workflows/template_version_comment.yml @@ -23,7 +23,6 @@ jobs: run: | python -m pip install --upgrade pip pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} - - name: Check nf-core outdated id: nf_core_outdated run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} diff --git a/.gitignore b/.gitignore index a42ce016..23b0c7de 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ testing/ testing* *.pyc null/ +.nf-test* diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f35df8c..4f10d8ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,73 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.0.0dev - [date] +## dev -Initial release of nf-core/rnavar, created with the [nf-core](https://nf-co.re/) template. +### Added -### `Added` +- [#116](https://github.com/nf-core/rnavar/pull/116) - Added `unzip` from nf-core modules for working with unzipped fasta and gtf files + +### Changed + +- [#78](https://github.com/nf-core/rnavar/pull/78) - Add a gvcf file generated by sample +- [#78](https://github.com/nf-core/rnavar/pull/78) - Merge on exome.bed +- [#78](https://github.com/nf-core/rnavar/pull/78) - Add option to choose the type exon/transcript/gene of studied feature in GTF +- [#95](https://github.com/nf-core/rnavar/pull/95) - Template update for nf-core/tools v2.9 +- [#97](https://github.com/nf-core/rnavar/pull/97) - Template update for nf-core/tools v2.10 +- [#109](https://github.com/nf-core/rnavar/pull/109) - Update all modules +- [#111](https://github.com/nf-core/rnavar/pull/111) - Template update for nf-core/tools v2.11 +- [#117](https://github.com/nf-core/rnavar/pull/117) - Template update for nf-core/tools v2.11.1 +- [#120](https://github.com/nf-core/rnavar/pull/120) - Template update for nf-core/tools v2.12 +- [#140](https://github.com/nf-core/rnavar/pull/140) - Template update for nf-core/tools v2.14.1 +- [#152](https://github.com/nf-core/rnavar/pull/152) - Template update for nf-core/tools v3.0.1 + +### Fixed + +- [#97](https://github.com/nf-core/rnavar/pull/97) - Update all gatk4 modules to disable JVM hotspot +- [#98](https://github.com/nf-core/rnavar/pull/98) - Restore annotation +- [#102](https://github.com/nf-core/rnavar/pull/102) - Fix process name SNPEFF_SNPEFF +- [#105](https://github.com/nf-core/rnavar/pull/105) - Fixing ch_dict +- [#106](https://github.com/nf-core/rnavar/pull/106) - Fixing vep_cache +- [#107](https://github.com/nf-core/rnavar/pull/107) - Fixing star index + single read files +- [#124](https://github.com/nf-core/rnavar/pull/124) - Fixed s3 bucket path in conditional statement for SnpEff cache +- [#127](https://github.com/nf-core/rnavar/pull/127) - Fixed s3 bucket path in conditional statement for VEP cache +- [#130](https://github.com/nf-core/rnavar/pull/130) - Added missing "def" in local variables +- [#132](https://github.com/nf-core/rnavar/pull/132) - Added missing variantcaller key to meta map, to fix null value in publishDir +- [#136](https://github.com/nf-core/rnavar/pull/136) - Replaced unzip module with gunzip, removed unzip module +- [#138](https://github.com/nf-core/rnavar/pull/138) - Proper usage of GVCF +- [#142](https://github.com/nf-core/rnavar/pull/142) - Fix dbsnp channels +- [#143](https://github.com/nf-core/rnavar/pull/143) - Use `DROP_MISSING_CONTIGS` by default in `GATK4_BEDTOINTERVALLIST` +- [#144](https://github.com/nf-core/rnavar/pull/144) - Change gatk_vf params from integer to floats +- [#149](https://github.com/nf-core/rnavar/pull/149) - Updated ch_gtf and ch_fasta_fai channels emitted by main.nf + +### Dependencies -### `Fixed` +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| bcftools | 1.17 | 1.18 | +| bedtools | 2.31.0 | 2.31.1 | +| fastqc | 0.11.9 | 0.12.1 | +| mosdepth | 0.3.3 | 0.3.6 | +| multiqc | 1.15 | 1.18 | +| samtools | 1.17 | 1.18 | + +## [1.0.0] nfcore/rnavar - 2022/06/20 + +First production release of the pipeline with latest software versions. + +This version is based on GATK4 best-practices for RNAseq [Ref](https://github.com/gatk-workflows/gatk4-rnaseq-germline-snps-indels) and it includes: + +### `Added` -### `Dependencies` +- Added `FastQC v0.11.9` from nf-core modules for read-level QC and summary. +- Added `STAR v2.7.9a` from nf-core modules for read alignment to reference genome. +- Added `Samtools v1.15.1` from nf-core modules for alignment statistics and QC. +- Added `GATK v4.2.6.1` from nf-core modules for alignment post-processing, variant calling and filtration. +- Added `Tabix v1.11` from nf-core modules for indexing BAM ann VCF files. +- Added `SnpEff v5.0` from nf-core modules for variant annotation. +- Added `Ensembl VEP v104.3` from nf-core modules for variant annotation. +- Added `MultiQC v1.12` from nf-core modules for QC summary report. +- Added Scatter i.e., one interval-list into many interval-files to run multiple processes in parallel. -### `Deprecated` +Thanks to everyone that contributed to this release. +Special thanks to @maxulysse and @FriederikeHanssen for your review and valuable suggestions. diff --git a/CITATIONS.md b/CITATIONS.md index bda4c349..10a8f371 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -14,10 +14,38 @@ > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [SAMtools](https://pubmed.ncbi.nlm.nih.gov/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [GATK](https://pubmed.ncbi.nlm.nih.gov/20644199/) + + > McKenna A, Hanna M, Banks E, et al.: The Genome Analysis Toolkit: a MapReduce framework for analyzing next-generation DNA sequencing data. Genome Res. 2010 Sep;20(9):1297-303. doi: 10.1101/gr.107524.110. Epub 2010 Jul 19. PubMed PMID: 20644199; PubMed Central PMCID: PMC2928508. + +- [snpEff](https://pubmed.ncbi.nlm.nih.gov/22728672/) + + > Cingolani P, Platts A, Wang le L, et al.: A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). Apr-Jun 2012;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. + +- [VEP](https://pubmed.ncbi.nlm.nih.gov/27268795/) + + > McLaren W, Gil L, Hunt SE, et al.: The Ensembl Variant Effect Predictor. Genome Biol. 2016 Jun 6;17(1):122. doi: 10.1186/s13059-016-0974-4. PubMed PMID: 27268795; PubMed Central PMCID: PMC4893825. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Tabix](https://pubmed.ncbi.nlm.nih.gov/21208982/) + + > Heng Li, Tabix: fast retrieval of sequence features from generic TAB-delimited files, Bioinformatics, Volume 27, Issue 5, 1 March 2011, Pages 718–719. doi: 10.1093/bioinformatics/btq671. PubMed PMID: 21208982; PubMed Central PMCID: PMC3042176. + +- [R](https://www.R-project.org/) + + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index 06ae4283..9c7f3ce4 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,9 @@ [![GitHub Actions CI Status](https://github.com/nf-core/rnavar/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/rnavar/actions/workflows/ci.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/rnavar/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnavar/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnavar/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/nf-core/rnavar/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/rnavar/actions/workflows/linting.yml) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/rnavar/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6669636-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.6669636) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) @@ -15,33 +17,51 @@ [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/rnavar) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnavar-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnavar)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23rnavar-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/rnavar) +[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) +[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) +[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -**nf-core/rnavar** is a bioinformatics pipeline that ... - - - - - - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +**nf-core/rnavar** is a bioinformatics pipeline for RNA variant calling analysis following GATK4 best practices. + +## Pipeline summary + +1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html)) +2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +3. Align reads to reference genome ([`STAR`](https://github.com/alexdobin/STAR)) +4. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +5. Duplicate read marking ([`GATK4 MarkDuplicates`](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard)) +6. Splits reads that contain Ns in their cigar string ([`GATK4 SplitNCigarReads`](https://gatk.broadinstitute.org/hc/en-us/articles/4409917482651-SplitNCigarReads)) +7. Estimate and correct systematic bias using base quality score recalibration ([`GATK4 BaseRecalibrator`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897206043-BaseRecalibrator), [`GATK4 ApplyBQSR`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897168667-ApplyBQSR)) +8. Convert a BED file to a Picard Interval List ([`GATK4 BedToIntervalList`](https://gatk.broadinstitute.org/hc/en-us/articles/4409924780827-BedToIntervalList-Picard-)) +9. Scatter one interval-list into many interval-files ([`GATK4 IntervalListTools`](https://gatk.broadinstitute.org/hc/en-us/articles/4409917392155-IntervalListTools-Picard-)) +10. Call SNPs and indels ([`GATK4 HaplotypeCaller`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897180827-HaplotypeCaller)) +11. Merge multiple VCF files into one VCF ([`GATK4 MergeVCFs`](https://gatk.broadinstitute.org/hc/en-us/articles/4409924817691-MergeVcfs-Picard-)) +12. Index the VCF ([`Tabix`](http://www.htslib.org/doc/tabix.html)) +13. Filter variant calls based on certain criteria ([`GATK4 VariantFiltration`](https://gatk.broadinstitute.org/hc/en-us/articles/4409897204763-VariantFiltration)) +14. Annotate variants ([`snpEff`](https://pcingola.github.io/SnpEff/se_introduction/), [Ensembl VEP](https://www.ensembl.org/info/docs/tools/vep/index.html)) +15. Present QC for raw read, alignment, gene biotype, sample similarity, and strand-specificity checks ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) + +### Summary of tools and version used in the pipeline + +| Tool | Version | +| ----------- | ------- | +| FastQC | 0.11.9 | +| STAR | 2.7.9a | +| Samtools | 1.15.1 | +| GATK | 4.2.6.1 | +| Tabix | 1.11 | +| SnpEff | 5.0 | +| Ensembl VEP | 104.3 | +| MultiQC | 1.12 | ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - -```bash -nextflow run nf-core/rnavar \ - -profile \ - --input samplesheet.csv \ - --outdir +```console +nextflow run nf-core/rnavar -profile --input samplesheet.csv --outdir --genome GRCh38 ``` > [!WARNING] @@ -79,11 +92,25 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/rnavar was originally written by Praveen Raj, Maxime U Garcia. +nf-core/rnavar was originally written in Nextflow DSL2 for use at the [Barntumörbanken, Karolinska Institutet](https://ki.se/forskning/barntumorbanken), by Praveen Raj ([@praveenraj2018](https://github.com/praveenraj2018)) and Maxime U Garcia ([@maxulysse](https://github.com/maxulysse)). + +nf-core/rnavar was originally written by Praveen Raj at [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken). +Maxime U Garcia at [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken) helped with development. + +Maintenance is now lead by Maxime U Garcia (now at [Seqera Labs](https://seqera/io)) + +Main developers: + +- [Maxime U Garcia](https://github.com/maxulysse) We thank the following people for their extensive assistance in the development of this pipeline: - +- [Harshil Patel](https://github.com/drpatelh) +- [Nicolás Schcolnicov](https://github.com/nschcolnicov) +- [Ömer An](https://github.com/bounlu) +- [Phil Ewels](https://github.com/ewels) +- [Praveen Raj](https://github.com/praveenraj2018) +- [Sarah Maman](https://github.com/SarahMaman) ## Contributions and Support @@ -93,10 +120,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - - +If you use nf-core/rnavar for your analysis, please cite it using the following doi: [10.5281/zenodo.6669636](https://doi.org/10.5281/zenodo.6669636) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 46025e30..01825cfb 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/rnavar Methods Description" section_href: "https://github.com/nf-core/rnavar" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/rnavar v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e838f017..d858664f 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -13,3 +13,54 @@ report_section_order: export_plots: true disable_version_detection: true +# Run only these modules +run_modules: + - custom_content + - fastqc + - star + - samtools + - picard + - gatk + - snpeff + - vep + +# Order of modules +module_order: + - fastqc: + name: "FastQC (raw)" + path_filters: + - "*_val_*.zip" + - star: + name: "Read Alignment (STAR)" + - samtools: + name: "Samtools Flagstat" + - picard: + name: "GATK4 MarkDuplicates" + info: "Metrics generated either by GATK4 MarkDuplicates" + - qualimap: + name: "Qualimap" + - gatk: + name: "GATK4 BQSR" + - snpeff: + name: "SNPeff" + - vep: + name: "VEP" + +extra_fn_clean_exts: + - "_val" + +# Don't show % Dups in the General Stats table (we have this from Picard) +table_columns_visible: + fastqc: + percent_duplicates: False + +sp: + samtools/stats: + fn: "*.aligned.bam.stats" + samtools/flagstat: + fn: "*.aligned.bam.flagstat" + picard/markdups: + fn: "*.markdup.sorted.metrics" + snpeff: + contents: "SnpEff_version" + max_filesize: 5000000 diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..6b6e0ec5 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,4 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,fastq_1,fastq_2,strandedness +RAP1_UNINDUCED_REP1,s3://nf-core-awsmegatests/rnaseq/input_data/minimal/GSE110004/SRR6357073_1.fastq.gz,,reverse +RAP1_UNINDUCED_REP2,s3://nf-core-awsmegatests/rnaseq/input_data/minimal/GSE110004/SRR6357074_1.fastq.gz,,reverse +RAP1_UNINDUCED_REP2,s3://nf-core-awsmegatests/rnaseq/input_data/minimal/GSE110004/SRR6357075_1.fastq.gz,,reverse diff --git a/assets/schema_input.json b/assets/schema_input.json index bcb2e914..b21e53d8 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -14,11 +14,11 @@ "meta": ["id"] }, "fastq_1": { + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", "type": "string", "format": "file-path", "exists": true, - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "pattern": "^\\S+\\.f(ast)?q\\.gz$" }, "fastq_2": { "type": "string", @@ -26,8 +26,14 @@ "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "strandedness": { + "type": "string", + "errorMessage": "Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded'", + "enum": ["forward", "reverse", "unstranded"], + "meta": ["strandedness"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1", "strandedness"] } } diff --git a/conf/base.config b/conf/base.config index 01895aef..cafd3d2e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -24,7 +24,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } diff --git a/conf/igenomes.config b/conf/igenomes.config index 3f114377..48ed5e9a 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -12,429 +12,308 @@ params { // illumina iGenomes reference file paths genomes { 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + snpeff_db = '87' + snpeff_genome = 'GRCh37' + vep_cache_version = '111' + vep_genome = 'GRCh37' + vep_species = 'homo_sapiens' } 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + snpeff_db = '105' + snpeff_genome = 'GRCh38' + vep_cache_version = '110' + vep_genome = 'GRCh38' + vep_species = 'homo_sapiens' } 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" } 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + snpeff_db = '99' + snpeff_genome = 'GRCm38' + vep_cache_version = '102' + vep_genome = 'GRCm38' + vep_species = 'mus_musculus' } 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" } 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" } 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + snpeff_db = '75' + snpeff_genome = 'UMD3.1' + vep_cache_version = '94' + vep_genome = 'UMD3.1' + vep_species = 'bos_taurus' } 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + snpeff_db = '105' + snpeff_genome = 'WBcel235' + vep_cache_version = '111' + vep_genome = 'WBcel235' + vep_species = 'caenorhabditis_elegans' } 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + snpeff_db = '99' + snpeff_genome = 'CanFam3.1' + vep_cache_version = '104' + vep_genome = 'CanFam3.1' + vep_species = 'canis_lupus_familiaris' } 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" } 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" } 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" } 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" } 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" } 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" } 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" } 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" } 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" } 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" - mito_name = "MT" + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" } 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" } 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + snpeff_db = '105' + snpeff_genome = 'R64-1-1' + vep_cache_version = '111' + vep_genome = 'R64-1-1' + vep_species = 'saccharomyces_cerevisiae' } 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" } 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" } 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" } 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" } 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + snpeff_db = '105' + snpeff_genome = 'GRCh38' + vep_cache_version = '111' + vep_genome = 'GRCh38' + vep_species = 'homo_sapiens' } 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + snpeff_db = '87' + snpeff_genome = 'GRCh37' + vep_cache_version = '111' + vep_genome = 'GRCh37' + vep_species = 'homo_sapiens' } 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + snpeff_db = '99' + snpeff_genome = 'GRCm38' + vep_cache_version = '102' + vep_genome = 'GRCm38' + vep_species = 'mus_musculus' } 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" } 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" } 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" } 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" } 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" } 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" } 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" } 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" } 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" } 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" } 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" } } } diff --git a/conf/modules.config b/conf/modules.config old mode 100644 new mode 100755 index d266a387..87651eb8 --- a/conf/modules.config +++ b/conf/modules.config @@ -3,10 +3,10 @@ Config file for defining DSL2 per module options and publishing paths ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. ---------------------------------------------------------------------------------------- */ @@ -18,16 +18,322 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: FASTQC { - ext.args = '--quiet' +} + +// PREPARE GENOME + +process { + + withName: CAT_FASTQ { + publishDir = [ + path: { "${params.outdir}/fastq" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_merged_fastq + ] + } + + withName: STAR_GENOMEGENERATE { + ext.args = params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '' + } + + withName: 'STAR_GENOMEGENERATE' { + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: GFFREAD { + ext.args = '--keep-exon-attrs -F -T' + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ?null : filename }, + enabled: params.save_reference + ] + } + + withName: GTF2BED { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + + withName: SAMTOOLS_FAIDX { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: GATK4_CREATESEQUENCEDICTIONARY { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] } - withName: 'MULTIQC' { +} + +// READ QC AND QC REPORT + +process { + + withName: MULTIQC { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/reports"}, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: !params.skip_multiqc ] } + withName: FASTQC { + ext.args = '--quiet' + publishDir = [ + enabled: false + ] + } +} + +// ALIGNMENT +process { + + // ALIGN_STAR + + withName: STAR_ALIGN { + ext.args = { [ + '--outSAMtype BAM SortedByCoordinate', + '--readFilesCommand zcat', + params.save_unaligned ? '--outReadsUnmapped Fastx' : '', + params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '', + params.star_twopass ? '--twopassMode Basic' : '', + params.star_max_memory_bamsort > 0 ? "--limitBAMsortRAM ${params.star_max_memory_bamsort}" : "", + params.star_bins_bamsort > 0 ? "--outBAMsortingBinsN ${params.star_bins_bamsort}" : "", + params.star_max_collapsed_junc > 0 ? "--limitOutSJcollapsed ${params.star_max_collapsed_junc}" : "" + ].flatten().unique(false).join(' ').trim() } + publishDir = [ + [ + path: { "${params.outdir}/reports/stats/${meta.id}/STAR_log" }, + mode: params.publish_dir_mode, + pattern: '*.{out,tab}' + ], + [ + path: { "${params.outdir}/preprocessing/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: false + ], + [ + path: { "${params.outdir}/preprocessing/${meta.id}/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] + } +} + +// POST ALIGNMENT AND PREPROCESSING BAM + +process { + + withName: '.*:ALIGN_STAR:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = {"${meta.id}.aligned"} + publishDir = [ + path: { "${params.outdir}/preprocessing/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.save_align_intermeds + ] + } + + withName: '.*:ALIGN_STAR:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = {"${meta.id}.aligned"} + publishDir = [ + path: { "${params.outdir}/preprocessing/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + + withName: GATK4_BEDTOINTERVALLIST { + publishDir = [ enabled: false ] + } + + withName: GATK4_INTERVALLISTTOOLS { + ext.args = [ + '--SUBDIVISION_MODE BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW', + '--UNIQUE true', + '--SORT true', + params.gatk_interval_scatter_count ? "--SCATTER_COUNT $params.gatk_interval_scatter_count" : '' + ].join(' ').trim() + publishDir = [ enabled: false ] + } + + withName: 'PICARD_MARKDUPLICATES' { + ext.args = [ + '--ASSUME_SORTED true', + '--VALIDATION_STRINGENCY LENIENT', + params.remove_duplicates ? '--REMOVE_DUPLICATES true' : '' + ].join(' ').trim() + ext.prefix = {"${meta.id}.md"} + publishDir = [ + [ + path: { "${params.outdir}/preprocessing/${meta.id}" }, + mode: params.publish_dir_mode, + enabled: true, + pattern: "*.{bam,bai,csi}" + ], + [ + path: { "${params.outdir}/reports/stats/${meta.id}" }, + mode: params.publish_dir_mode, + enabled: true, + pattern: "*.{metrics}" + ] + ] + } + + withName: '.*:BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = {"${meta.id}.md"} + publishDir = [ + path: { "${params.outdir}/preprocessing/${meta.id}" }, + mode: params.publish_dir_mode, + enabled: true, + pattern: "*.{bai,csi}" + ] + } + + withName: '.*:SPLITNCIGAR:GATK4_SPLITNCIGARREADS' { + ext.args = '--create-output-bam-index false' + } + + withName: '.*:SPLITNCIGAR:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + } + + withName: '.*:SPLITNCIGAR:.*' { + publishDir = [ enabled: false ] + } + + withName: 'SAMTOOLS_STATS|SAMTOOLS_FLAGSTAT|SAMTOOLS_IDXSTATS' { + publishDir = [ + path: { "${params.outdir}/reports/stats/${meta.id}" }, + mode: params.publish_dir_mode, + enabled: !params.skip_multiqc, + pattern: "*.{stats,flagstat}" + ] + } + +} + +// BASE RECALIBRATION PROCESS +if(!params.skip_baserecalibration) { + process { + + withName: GATK4_BASERECALIBRATOR { + ext.args = '--use-original-qualities' + publishDir = [ enabled: false ] + } + + withName: '.*:RECALIBRATE:APPLYBQSR' { + ext.args = [ + '--use-original-qualities', + '--add-output-sam-program-record' + ].join(' ').trim() + ext.prefix = {"${meta.id}.recal"} + publishDir = [ + path: { "${params.outdir}/preprocessing/${meta.id}" }, + mode: params.publish_dir_mode, + enabled: true, + pattern: "*.{bam}" + ] + } + + withName: '.*:RECALIBRATE:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = {"${meta.id}.recal"} + publishDir = [ + path: { "${params.outdir}/preprocessing/${meta.id}" }, + mode: params.publish_dir_mode, + enabled: true, + pattern: "*.{bai,csi}" + ] + } + } +} + +// VARIANT CALLING AND FILTERING + +process { + + withName: GATK4_HAPLOTYPECALLER { + ext.args = [ + '--dont-use-soft-clipped-bases', + '--create-output-variant-index true', + params.gatk_hc_call_conf ? "--standard-min-confidence-threshold-for-calling $params.gatk_hc_call_conf" : '', + params.generate_gvcf ? "-ERC GVCF" : "" + ].join(' ').trim() + publishDir = [ enabled: false ] + } + + withName: GATK4_MERGEVCFS { + ext.prefix = {"${meta.id}.haplotypecaller"} + publishDir = [ + path: { "${params.outdir}/variant_calling/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{vcf.gz}" + ] + } + withName: GATK4_COMBINEGVCFS { + ext.prefix = {"${meta.id}.haplotypecaller"} + publishDir = [ + path: { "${params.outdir}/variant_calling/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{vcf.gz}" + ] + } + + withName: TABIX_TABIX { + ext.args = params.bam_csi_index ? '--csi' : '' + ext.prefix = {"${meta.id}.haplotypecaller"} + publishDir = [ + path: { "${params.outdir}/variant_calling/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{vcf.gz.tbi,vcf.gz.csi}" + ] + } + + withName: GATK4_VARIANTFILTRATION { + ext.prefix = {"${meta.id}.haplotypecaller.filtered"} + ext.args = [ + params.gatk_vf_window_size ? "--window $params.gatk_vf_window_size" : '', + params.gatk_vf_cluster_size ? "--cluster $params.gatk_vf_cluster_size" : '', + params.gatk_vf_fs_filter ? "--filter-name \"FS\" --filter \"FS > $params.gatk_vf_fs_filter\" " : '', + params.gatk_vf_qd_filter ? "--filter-name \"QD\" --filter \"QD < $params.gatk_vf_qd_filter\" " : '', + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/variant_calling/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'NFCORE_RNAVAR:RNAVAR:GATK4_BEDTOINTERVALLIST' { + ext.args = '--DROP_MISSING_CONTIGS TRUE' + } } diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config new file mode 100644 index 00000000..ea4f921e --- /dev/null +++ b/conf/modules/annotate.config @@ -0,0 +1,117 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// ANNOTATE + +process { + + // SNPEFF + if (params.annotate_tools && (params.annotate_tools.split(',').contains('snpeff') || params.annotate_tools.split(',').contains('merge'))) { + withName: 'SNPEFF_SNPEFF' { + ext.args = { '-nodownload -canon -v' } + ext.prefix = { vcf.baseName - '.vcf' + '_snpEff' } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/snpeff/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{csv,html,genes.txt}", + saveAs: { params.annotate_tools.split(',').contains('snpeff') ? it : null } + ] + ] + } + } + + // VEP + if (params.annotate_tools && (params.annotate_tools.split(',').contains('vep') || params.annotate_tools.split(',').contains('merge'))) { + withName: 'ENSEMBLVEP_VEP' { + ext.args = { [ + "--stats_file ${vcf.baseName - '.vcf' + '_VEP.ann'}.summary.html", + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/usr/local/share/ensembl-vep-${params.vep_version}" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', + (params.vep_custom_args) ?: '' + ].join(' ').trim() } + // If just VEP: _VEP.ann.vcf + ext.prefix = { vcf.baseName - '.vcf' + '_VEP.ann' } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz}" + ] + ] + } + } + + // BCFTOOLS ANNOTATE + if (params.annotate_tools && params.annotate_tools.split(',').contains('bcfann')) { + withName: 'NFCORE_RNAVAR:RNAVAR:VCF_ANNOTATE_ALL:VCF_ANNOTATE_BCFTOOLS:BCFTOOLS_ANNOTATE' { + ext.args = { '--output-type z' } + ext.prefix = { input.baseName - '.vcf' + '_BCF.ann' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz}" + ] + } + } + + // SNPEFF THEN VEP + if (params.annotate_tools && params.annotate_tools.split(',').contains('merge')) { + withName: 'NFCORE_RNAVAR:RNAVAR:VCF_ANNOTATE_ALL:VCF_ANNOTATE_MERGE:ENSEMBLVEP_VEP' { + ext.args = { [ + "--stats_file ${vcf.baseName - '.ann.vcf' + '_VEP.ann'}.summary.html", + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/usr/local/share/ensembl-vep-${params.vep_version}" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', + (params.vep_custom_args) ?: '' + ].join(' ').trim() } + // If merge: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab + ext.prefix = { vcf.baseName - '.ann.vcf' + '_VEP.ann' } + } + } + + // ALL ANNOTATION TOOLS + if (params.annotate_tools && (params.annotate_tools.split(',').contains('snpeff') || params.annotate_tools.split(',').contains('vep') || params.annotate_tools.split(',').contains('merge') || params.annotate_tools.split(',').contains('bcfann'))) { + withName: 'NFCORE_RNAVAR:RNAVAR:VCF_ANNOTATE_ALL:.*:(TABIX_BGZIPTABIX|TABIX_TABIX)' { + ext.prefix = { input.name - '.vcf' } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz.tbi}" + ] + } + } + + if (params.annotate_tools && (params.annotate_tools.split(',').contains('snpeff') || params.annotate_tools.split(',').contains('merge'))) { + withName: 'NFCORE_RNAVAR:RNAVAR:VCF_ANNOTATE_ALL:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}", + saveAs: { params.annotate_tools.split(',').contains('snpeff') ? it : null } + ] + } + } +} diff --git a/conf/modules/prepare_cache.config b/conf/modules/prepare_cache.config new file mode 100644 index 00000000..7329d0ab --- /dev/null +++ b/conf/modules/prepare_cache.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_CACHE + +process { + + // SNPEFF + withName: 'SNPEFF_DOWNLOAD' { + ext.when = { params.annotate_tools && (params.annotate_tools.split(',').contains('snpeff') || params.annotate_tools.split(',').contains('merge')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } + + // VEP + withName: 'ENSEMBLVEP_DOWNLOAD' { + ext.when = { params.annotate_tools && (params.annotate_tools.split(',').contains('vep') || params.annotate_tools.split(',').contains('merge')) } + ext.args = { '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' } + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } +} diff --git a/conf/test.config b/conf/test.config index ca6300a9..7fc184d5 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,10 +23,28 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = "${projectDir}/tests/csv/1.0/fastq_single.csv" // Genome references - genome = 'R64-1-1' + genome = null + + fasta = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" + dict = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/genome.dict" + gtf = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/genome.gtf" + + // Known genome resources (optional) + dbsnp = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + dbsnp_tbi = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" + known_indels = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" + known_indels_tbi = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" + + // STAR index (optional) + star_index = "${params.pipelines_testdata_base_path}/genomics/homo_sapiens/genome/index/star/star.tar.gz" + + // Annotation + snpeff_db = '105' + snpeff_genome = 'WBcel235' + vep_cache_version = '110' + vep_genome = 'WBcel235' + vep_species = 'caenorhabditis_elegans' } diff --git a/conf/test_full.config b/conf/test_full.config index 82f01879..81133bdc 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,11 +14,8 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' - - // Genome references - genome = 'R64-1-1' + // Parameters for full-size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnavar/samplesheet/v1.0/samplesheet_full.csv' + read_length = 101 + genome = 'GRCh38' } diff --git a/docs/images/alignment_star_mapping_stats.png b/docs/images/alignment_star_mapping_stats.png new file mode 100644 index 00000000..d6a27911 Binary files /dev/null and b/docs/images/alignment_star_mapping_stats.png differ diff --git a/docs/images/fastqc_gc_content.png b/docs/images/fastqc_gc_content.png new file mode 100644 index 00000000..df3d5f7b Binary files /dev/null and b/docs/images/fastqc_gc_content.png differ diff --git a/docs/images/fastqc_length_dist.png b/docs/images/fastqc_length_dist.png new file mode 100644 index 00000000..8819cfeb Binary files /dev/null and b/docs/images/fastqc_length_dist.png differ diff --git a/docs/images/fastqc_mean_qual_score_dist.png b/docs/images/fastqc_mean_qual_score_dist.png new file mode 100644 index 00000000..e31cdc11 Binary files /dev/null and b/docs/images/fastqc_mean_qual_score_dist.png differ diff --git a/docs/images/fastqc_overall_status.png b/docs/images/fastqc_overall_status.png new file mode 100644 index 00000000..85f789c3 Binary files /dev/null and b/docs/images/fastqc_overall_status.png differ diff --git a/docs/images/fastqc_seq_counts.png b/docs/images/fastqc_seq_counts.png new file mode 100644 index 00000000..a4264a55 Binary files /dev/null and b/docs/images/fastqc_seq_counts.png differ diff --git a/docs/images/multiqc_report.png b/docs/images/multiqc_report.png new file mode 100644 index 00000000..6acc3d40 Binary files /dev/null and b/docs/images/multiqc_report.png differ diff --git a/docs/images/read_markduplicates_stats.png b/docs/images/read_markduplicates_stats.png new file mode 100644 index 00000000..cacde255 Binary files /dev/null and b/docs/images/read_markduplicates_stats.png differ diff --git a/docs/images/snpeff_variant_quality.png b/docs/images/snpeff_variant_quality.png new file mode 100644 index 00000000..7e117da2 Binary files /dev/null and b/docs/images/snpeff_variant_quality.png differ diff --git a/docs/images/snpeff_variants_by_effect_types.png b/docs/images/snpeff_variants_by_effect_types.png new file mode 100644 index 00000000..c6d71647 Binary files /dev/null and b/docs/images/snpeff_variants_by_effect_types.png differ diff --git a/docs/images/snpeff_variants_by_impact.png b/docs/images/snpeff_variants_by_impact.png new file mode 100644 index 00000000..830aba68 Binary files /dev/null and b/docs/images/snpeff_variants_by_impact.png differ diff --git a/docs/images/snpeff_variants_by_region.png b/docs/images/snpeff_variants_by_region.png new file mode 100644 index 00000000..e9f8deec Binary files /dev/null and b/docs/images/snpeff_variants_by_region.png differ diff --git a/docs/images/vep_general_stats.png b/docs/images/vep_general_stats.png new file mode 100644 index 00000000..cb779627 Binary files /dev/null and b/docs/images/vep_general_stats.png differ diff --git a/docs/images/vep_pph_summary.png b/docs/images/vep_pph_summary.png new file mode 100644 index 00000000..e273fe11 Binary files /dev/null and b/docs/images/vep_pph_summary.png differ diff --git a/docs/images/vep_sift_summary.png b/docs/images/vep_sift_summary.png new file mode 100644 index 00000000..ed661d14 Binary files /dev/null and b/docs/images/vep_sift_summary.png differ diff --git a/docs/output.md b/docs/output.md index 40fd2f1f..a133cff4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,50 +2,390 @@ ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline based on a public dataset. -The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. +The directories listed below are created in the `results/` directory after the pipeline has finished. All paths are relative to the top-level results directory. - +### Dataset + +RNAseq data has been taken from GM12878 GIAB sample SRA Accession [SRR5665260](https://www.ncbi.nlm.nih.gov/sra/?term=SRR5665260). The dataset has 38.6 million PE reads of read length 2x151 bp from NextSeq 500 sequencing platform. + +### Samplesheet + +A sample sheet has been prepared in the following way to set the FASTQ files to run the analysis. + +```console +sample,fastq_1,fastq_2,strandedness +GM12878,/data/GM12878/SRR5665260_1.fastq.gz,/data/GM12878/SRR5665260_2.fastq.gz,reverse +``` + +### Execution + +The pipeline has been executed with the following command: + +```console +nextflow run nf-core/rnavar -profile ,docker --input samplesheet.csv --genome GRCh38 --annotate_tools merge --outdir results +``` + +The `` used in this experiment can be found [here](https://github.com/nf-core/configs/blob/master/conf/pipeline/rnavar/munin.config). However, you can create your own institutional config and place it on [nf-core/configs](https://github.com/nf-core/configs/tree/master/conf/pipeline/rnavar) and then use the config name directly in the command instead of `` to use your own data and parameters. ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [nf-core/rnavar: Output](#nf-corernavar-output) + - [Introduction](#introduction) + - [Dataset](#dataset) + - [Samplesheet](#samplesheet) + - [Execution](#execution) + - [Pipeline overview](#pipeline-overview) + - [Preprocessing](#preprocessing) + - [cat](#cat) + - [Alignment](#alignment) + - [STAR](#star) + - [Alignment post-processing](#alignment-post-processing) + - [MarkDuplicates](#markduplicates) + - [SplitNCigarReads](#splitncigarreads) + - [Base (Quality Score) Recalibration](#base-quality-score-recalibration) + - [GATK BaseRecalibrator](#gatk-baserecalibrator) + - [GATK ApplyBQSR](#gatk-applybqsr) + - [Variant calling](#variant-calling) + - [Variant filtering](#variant-filtering) + - [Variant annotation](#variant-annotation) + - [snpEff](#snpeff) + - [VEP](#vep) + - [QC and Reporting](#qc-and-reporting) + - [QC](#qc) + - [FastQC](#fastqc) + - [GATK MarkDuplicates reports](#gatk-markduplicates-reports) + - [samtools stats](#samtools-stats) + - [snpEff reports](#snpeff-reports) + - [VEP reports](#vep-reports) + - [Reporting](#reporting) + - [MultiQC](#multiqc) + - [Pipeline information](#pipeline-information) + +## Preprocessing + +### cat + +
+Output files + +- `fastq/` + - `*.merged.fastq.gz`: If `--save_merged_fastq` is specified, concatenated FastQ files will be placed in this directory. + +
+ +If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage documentation](https://nf-co.re/rnavar/usage#samplesheet-input) to see how to specify these samples in the input samplesheet. + +## Alignment + +### STAR + +[STAR](https://github.com/alexdobin/STAR) is a read aligner designed for splice aware mapping typical of RNA sequencing data. STAR stands for *S*pliced *T*ranscripts *A*lignment to a *R*eference, and has been shown to have high accuracy and outperforms other aligners by more than a factor of 50 in mapping speed, but it is memory intensive. + +![MultiQC - STAR Alignment stats](images/alignment_star_mapping_stats.png) + +
+Output files + +- `preprocessing/[SAMPLE]/` + - `[SAMPLE].aligned.bam`: If `--save_align_intermeds` is specified the original BAM file containing read alignments to the reference genome will be placed in this directory. + - `[SAMPLE].aligned.bam.bai`: This is the index of the above \*.aligned.bam +- `preprocessing/[SAMPLE]/log` + - `[SAMPLE].Log.final.out`: STAR alignment report containing the mapping results summary. + - `[SAMPLE].Log.out` and `[SAMPLE].Log.progress.out`: STAR log files containing detailed information about the run. Typically only useful for debugging purposes. + - `[SAMPLE].SJ.out.tab`: File containing filtered splice junctions detected after mapping the reads. +- `preprocessing/[SAMPLE]/unmapped` + - `[SAMPLE].unmapped_*.fastq.gz`: If `--save_unaligned` is specified, FastQ files containing unmapped reads will be placed in this directory. +- `reports/stats/[SAMPLE]/` + - `[SAMPLE].aligned.bam.flagstat`: Samtools flagstat summary of the alignment + - `[SAMPLE].aligned.bam.stats`: Samtools stat output + +
+ +## Alignment post-processing + +### MarkDuplicates + +[GATK MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360042477492-MarkDuplicates-Picard) locates and tags duplicate reads in a `BAM` file. The tool's main output is a new BAM file, in which duplicates have been identified in the SAM flags field for each read. + +![MultiQC - MarkDuplicate stats](images/read_markduplicates_stats.png) + +If desired, duplicates can be removed using the `--remove_duplicates true` option. + +
+Output files + +- `preprocessing/[SAMPLE]/` + - `[SAMPLE].markdup.sorted.bam`: Picard Markduplicate bam file. + - `[SAMPLE].markdup.sorted.bam.bai`: This is the index of the above \*.aligned.bam + +
+ +### SplitNCigarReads + +[GATK SplitNCigarReads](https://gatk.broadinstitute.org/hc/en-us/articles/360036858811-SplitNCigarReads) is executed for post-processing RNA reads aligned against the reference. The tool's main output is a new BAM file with reads split at N CIGAR elements and CIGAR strings updated. + +Currently, the pipeline does not produce the new BAM file in the output directory. + +### Base (Quality Score) Recalibration + +Base quality recalibration step runs by default and it produces the recalibrated BAM file for variant calling, as described below. However, you can turn off this step by using `--skip_baserecalibration true` option, and in that case, no recalibrated BAM file is produced and the pipeline uses the un-calibrated BAM file will be used for variant calling. + +#### GATK BaseRecalibrator + +[GATK BaseRecalibrator](https://gatk.broadinstitute.org/hc/en-us/articles/360042477672-BaseRecalibrator) generates a recalibration table based on various co-variates. + +Currently, the pipeline does not produce the recalibration table file in the output directory. + +#### GATK ApplyBQSR + +[GATK ApplyBQSR](https://gatk.broadinstitute.org/hc/en-us/articles/360042476852-ApplyBQSR) recalibrates the base qualities of the input reads based on the recalibration table produced by the [GATK BaseRecalibrator](#gatk-baserecalibrator) tool. + +
+Output files + +- `preprocessing/[SAMPLE]/` + - `[SAMPLE].recal.bam`: Recalibrated bam file. + - `[SAMPLE].recal.bam.bai`: This is the index of the above recalibrated bam. + +
+ +## Variant calling + +[GATK HaplotypeCaller](https://gatk.broadinstitute.org/hc/en-us/articles/4414586765723-HaplotypeCaller) is used to call SNVs and small indels in the sample. The `Recalibrated BAM` file is used as an input to this process and the output file is produced in VCF format. + +
+Output files + +- `results/variant_calling/[SAMPLE]/` + - `[SAMPLE].haplotypecaller.vcf.gz`: Variant calls in VCF format. + - `[SAMPLE].haplotypecaller.vcf.gz.tbi`: This is the index of the above VCF file. + +
+ +## Variant filtering + +[GATK VariantFiltration](https://gatk.broadinstitute.org/hc/en-us/articles/360037434691-VariantFiltration) is used for hard-filtering variant calls based on certain criteria. Records are hard-filtered by changing the value in the FILTER field to something other than PASS. Filtered records will be preserved in the output. + +
+Output files + +- `results/variant_calling/[SAMPLE]/` + - `[SAMPLE].haplotypecaller.filtered.vcf.gz`: Variant VCF with updated FILTER field. + - `[SAMPLE].haplotypecaller.filtered.vcf.gz.tbi`: This is the index of the above VCF file. + +
+ +## Variant annotation + +This directory contains results from the final annotation steps: two tools are used for annotation, [snpEff](http://snpeff.sourceforge.net/) and [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html). + +### snpEff + +[snpeff](http://snpeff.sourceforge.net/) is a genetic variant annotation and effect prediction toolbox. +It annotates and predicts the effects of variants on genes (such as amino acid changes) using multiple databases for annotations. +The generated `VCF` header contains the software version and the used command line. + +To annotate variants using `snpeff`, you can use `--annotate_tools snpeff` or `--annotate_tools merge`. +The annotated variant files in VCF format can be found in `results/variant_annotation` folder. -### FastQC +![MultiQC - snpEff variant by region](images/snpeff_variants_by_region.png) +![MultiQC - snpEff variant by impact](images/snpeff_variants_by_impact.png) +![MultiQC - snpEff variant by effect types](images/snpeff_variants_by_effect_types.png) +![MultiQC - snpEff variant by quality](images/snpeff_variant_quality.png)
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `results/variant_annotation/[SAMPLE]/` + - `[SAMPLE]_snpEff.ann.vcf.gz`: Annotated VCF from snpEff process. + - `[SAMPLE]_snpEff.ann.vcf.gz.tbi`: This is the index of the above VCF file.
+For further reading and documentation see the [snpEff manual](http://snpeff.sourceforge.net/SnpEff_manual.html#outputSummary) + +### VEP + +[VEP (Variant Effect Predictor)](https://www.ensembl.org/info/docs/tools/vep/index.html), based on `Ensembl`, is a tool to determine the effects of the variants. The generated `VCF` header contains the software version, also the version numbers for additional databases like `Clinvar` or `dbSNP` used in the `VEP` line. +The format of the [consequence annotations](https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html) is also in the `VCF` header describing the `INFO` field. + +Currently, it contains: + +- `Consequence`: impact of the variation, if there is any +- `Codons`: the codon change, i.e. cGt/cAt +- `Amino_acids`: change in amino acids, i.e. R/H if there is any +- `Gene`: ENSEMBL gene name +- `SYMBOL`: gene symbol +- `Feature`: actual transcript name +- `EXON`: affected exon +- `PolyPhen`: prediction based on [PolyPhen](http://genetics.bwh.harvard.edu/pph2/) +- `SIFT`: prediction by [SIFT](http://sift.bii.a-star.edu.sg/) +- `Protein_position`: Relative position of amino acid in protein +- `BIOTYPE`: Biotype of transcript or regulatory feature + +To annotate variants using `vep`, you can use `--annotate_tools vep`. +The annotated variant files in VCF format can be found in `results/variant_annotation` folder. + +![MultiQC - VEP general statistics](images/vep_general_stats.png) +![MultiQC - VEP SIFT summary](images/vep_sift_summary.png) +![MultiQC - VEP Polyphen summary](images/vep_pph_summary.png) + +
+Output files + +- `results/variant_annotation/[SAMPLE]/` + - `[SAMPLE]_VEP.ann.vcf.gz`: Annotated VCF from VEP process. + - `[SAMPLE]_VEP.ann.vcf.gz.tbi`: This is the index of the above VCF file. + +
+ +When `--annotate_tools merge` option is used, the annotation from both `snpeff` and `vep` are combined into a single VCF file which can be found with the following naming convention. + +
+Output files + +- `results/variant_annotation/[SAMPLE]/` + - `[SAMPLE]_snpEff_VEP.ann.vcf.gz`: Combined annotation from both snpEff and VEP. + - `[SAMPLE]_snpEff_VEP.ann.vcf.gz.tbi`: This is the index of the above VCF file. + +
+ +For further reading and documentation see the [VEP manual](https://www.ensembl.org/info/docs/tools/vep/index.html) + +## QC and Reporting + +### QC + +#### FastQC + [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). -### MultiQC +Plots will show: + +- Sequence counts for each sample. +- Sequence Quality Histograms: The mean quality value across each base position in the read. +- Per Sequence Quality Scores: The number of reads with average quality scores. Shows if a subset of reads has poor quality. +- Per Base Sequence Content: The proportion of each base position for which each of the four normal DNA bases has been called. +- Per Sequence GC Content: The average GC content of reads. Normal random library typically have a roughly normal distribution of GC content. +- Per Base N Content: The percentage of base calls at each position for which an N was called. +- Sequence Length Distribution. +- Sequence Duplication Levels: The relative level of duplication found for every sequence. +- Overrepresented sequences: The total amount of overrepresented sequences found in each library. +- Adapter Content: The cumulative percentage count of the proportion of your library which has seen each of the adapter sequences at each position. + +![MultiQC - FASTQC Sequence Counts ](images/fastqc_seq_counts.png) +![MultiQC - FASTQC Mean Quality Distribution](images/fastqc_mean_qual_score_dist.png) +![MultiQC - FASTQC GC content](images/fastqc_gc_content.png) +![MultiQC - FASTQC Read Length Distribution](images/fastqc_length_dist.png) +![MultiQC - FASTQC Overall summary](images/fastqc_overall_status.png) + +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: + +#### GATK MarkDuplicates reports + +More information in the [GATK MarkDuplicates section](#gatk-markduplicates) + +Duplicates can arise during sample preparation _e.g._ library construction using PCR. +Duplicate reads can also result from a single amplification cluster, incorrectly detected as multiple clusters by the optical sensor of the sequencing instrument. These duplication artifacts are referred to as optical duplicates.
Output files -- `multiqc/` - - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. +- `reports/stats/[SAMPLE]/` + - `[SAMPLE].markdup.sorted.metrics`: Information about the number of duplicate reads in the sample.
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +For further reading and documentation see the [MarkDuplicates manual](https://software.broadinstitute.org/gatk/documentation/tooldocs/4.1.2.0/picard_sam_markduplicates_MarkDuplicates.php). + +#### samtools stats + +[samtools stats](https://www.htslib.org/doc/samtools.html) collects statistics from `BAM` files and outputs in a text format. -Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +Plots will show: + +- Alignment metrics. + +
+Output files + +- `results/reports/stats/[SAMPLE]/` + - `[SAMPLE].aligned.bam.flagstat`: Samtools flagstat output on raw alignment BAM. + - `[SAMPLE].aligned.bam.stats`: Samtools stats on raw alignment BAM. + - `[SAMPLE].markdup.sorted.bam.flagstat`: Samtools flagstat output on markduplicated BAM. + - `[SAMPLE].markdup.sorted.bam.stats`: Samtools flagstat output on markduplicated BAM. + - `[SAMPLE].recal.bam.stats`: Samtools flagstat output on recalibrated BAM. + +
+ +For further reading and documentation see the [`samtools` manual](https://www.htslib.org/doc/samtools.html#COMMANDS_AND_OPTIONS) + +#### snpEff reports + +[snpeff](http://snpeff.sourceforge.net/) is a genetic variant annotation and effect prediction toolbox. +It annotates and predicts the effects of variants on genes (such as amino acid changes) using multiple databases for annotations. + +Plots will shows : + +- locations of detected variants in the genome and the number of variants for each location. +- the putative impact of detected variants and the number of variants for each impact. +- the effect of variants at protein level and the number of variants for each effect type. +- the quantity as function of the variant quality score. + +
+Output files + +- `results/reports/SnpEff/[SAMPLE]/` + - `[SAMPLE].csv`: Summary of variants by chromosome, region, effect, impact, functional class, type, etc. + - `[SAMPLE].genes.txt`: TXT (tab separated) summary counts for variants affecting each transcript and gene. + - `snpEff_summary.html`: Statistics with graphs to be viewed with a web browser + +
+ +#### VEP reports + +[VEP (Variant Effect Predictor)](https://www.ensembl.org/info/docs/tools/vep/index.html), based on `Ensembl`, is a tools to determine the effects of all sorts of variants, including SNPs, indels, structural variants, CNVs. + +
+Output files + +- `results/reports/EnsemblVEP/[SAMPLE]/` + - `[SAMPLE].summary.html`: Statistics with graphs to be viewed with a web browser + +
+ +For further reading and documentation see the [VEP manual](https://www.ensembl.org/info/docs/tools/vep/index.html) + +### Reporting + +#### MultiQC + +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. +Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +![MultiQC - Report View](images/multiqc_report.png) + +The pipeline has special steps which also allow the software versions to be reported in the `MultiQC` output for future traceability. + +For more information about how to use `MultiQC` reports, see [https://multiqc.info](https://multiqc.info). + +
+Output files + +- `reports/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats. + +
-### Pipeline information +## Pipeline information
Output files diff --git a/docs/usage.md b/docs/usage.md index 1bc76211..8e2f2abf 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,60 +4,12 @@ > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -## Introduction - - - -## Samplesheet input - -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. - -```bash ---input '[path to samplesheet file]' -``` - -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - -### Full samplesheet - -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, -``` - -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | - -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. - ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/rnavar --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/rnavar --input ./samplesheet.csv --outdir ./results --genome GRCh38 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -118,7 +70,170 @@ To further assist in reproducbility, you can use share and re-use [parameter fil If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ::: -## Core Nextflow arguments +## Samplesheet input + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row as shown in the examples below. + +```console +--input '[path to samplesheet file]' +``` + +### Multiple runs of the same sample + +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: + +```console +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,unstranded +CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,unstranded +CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,unstranded +``` + +#### Full samplesheet + +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. + +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. + +```console +sample,fastq_1,fastq_2,strandedness +CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,forward +CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,forward +CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,forward +TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz,,reverse +TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz,,reverse +TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz,,reverse +TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz,,reverse +``` + +| Column | Description | +| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `strandedness` | Sample strand-specificity. Must be one of `unstranded`, `forward` or `reverse`. | + +An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. + +## PIPELINE PARAMETERS AND DESCRIPTION + +## Reference genome files + +The minimum reference genome requirements are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. However, it is more storage and compute friendly if you are able to re-use reference genome files as efficiently as possible. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices (e.g. those unavailable on [AWS iGenomes](https://nf-co.re/usage/reference_genomes)) so that you can save them somewhere locally. + +> **NB:** Compressed reference files are also supported by the pipeline i.e. standard files with the `.gz` extension and indices folders with the `tar.gz` extension. + +The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. You can then either provide the appropriate reference genome files on the command-line via the appropriate parameters (e.g. `--star_index '/path/to/STAR/index/'`) or via a custom config file. + +> **NB:** If you are supplying a pre-built genome index file via `--star_index`, please ensure that the index has been generated with the latest STAR version i.e. v2.7.9a or above. In case if the pipeline found an incompatible index, it will generate a new one using the reference genome which will consume time and memory unnecessarily. + +- If `--genome` is provided then the FASTA and GTF files (and existing indices) will be automatically obtained from AWS-iGenomes unless these have already been downloaded locally in the path specified by `--igenomes_base`. +- If `--gff` is provided as input then this will be converted to a GTF file, or the latter will be used if both are provided. +- The `--exon_bed` parameter file is expected to be exon coordinates with at least three columns i.e., in the file. The should be 0-based. If this parameter is not provided, the exon coordinates are extracted from the GTF file and generates a bed file by the process `GTF2BED`. +- If `--star_index` is not provided then it will be generated from the reference genome FASTA file using `STAR --runmode genomeGenerate` command. + +> **NB:** In case if you are providing a GTF and/or a BED file, please ensure that the chromosomes and contigs in the files are also present in the genome FASTA (and in the .dict) file. Otherwise `GATK BedToIntervalList` module is likely to fail if the chromosomes/contigs do not match with the reference genome data. + +### Recommendation when using very large genomes + +When the pipeline is used on very large genomes having chromosome size greater than 512Mb (e.g. Chromosome 1 of Monodelphis domestica has a size of 748055161bp), please make sure that `--bam_csi_index` parameter is provided in order to use coordinate sorted index (CSI) instead of standard binary alignment index (BAI). + +> **NB:** When `--bam_csi_index` is used, variant filtration step will be disabled as `GATK VariantFiltration` does not currently support CSI index for the input VCF. It may be incorporated in the future when newer GATK versions support CSI for VCF inputs. + +## Alignment options + +The pipeline uses [STAR](https://github.com/alexdobin/STAR) to map the raw FastQ reads to the reference genome. STAR is fast but requires a lot of memory to run, typically around 38GB for the Human GRCh37 reference genome. + +By default, STAR runs in `2-pass` mode. For the most sensitive novel junction discovery, it is recommend running STAR in the 2-pass +mode. It does not increase the number of detected novel junctions, but allows to detect more splices reads mapping to novel junctions. The basic idea is to run 1st pass of STAR mapping with the usual parameters, then collect the junctions detected in the first pass, and use them as ”annotated” junctions for the 2nd pass mapping. You can turn off this feature by setting `--star_twopass false` in command line. + +Read length is an important parameter therefore it has to be used carefully. The default is set to 150, but it has to be changed according to the input reads. For example, if the input read length is 2x151bp, then you use `--read_length 151`. The `--read_length` parameter is used while generating an index as well as in the alignment process. In both processes, the pipeline use (read_length - 1) to the STAR parameter `--sjdbOverhang` as recommended in STAR documentation. + +> **NB:** Read length `--read_length` is an important parameter, therefore it has to set according to the input read length. If you are supplying a pre-built genome index, please make sure that you have used the same (read_length -1) during the genomeGenerate step. + +STAR alignment generates a coordinated-sorted BAM file as output. The coordinate-sorting process can be very memory intensive when the input data is deep sequenced or the genome has many highly expressed loci. When the pipeline runs on memory constrained environment, sorting step may fail due to low memory. In such cases you may adjust the limit parameters such as `--star_limitBAMsortRAM`, `--star_outBAMsortingBinsN` and `--star_limitOutSJcollapsed` to increase the sorting memory and genomic bins. Refer the parameter documentation for the default values and adjust as appropriate based on your memory availability. + +## Preprocessing options + +Marking duplicate reads is performed using `GATK4 MarkDuplicates` tool. The tool does not remove duplicate reads by default, however you can set `--remove_duplicates true` to remove them. + +GATK best practices has been followed in this pipeline for RNA analysis, hence it uses GATK modules such as `SplitNCigarReads`, `BaseRecalibrator`, `ApplyBQSR`. The `BaseRecalibrator` process requires known variants sites VCF. ExAc, gnomAD, or dbSNP resources can be used as known sites of variation.You can supply the VCF and index files using parameters such as `--dbsnp`, `--dbsnp_tbi`, `--known_indels`, `--known_indels_tbi`. + +> **NB:** Base recalibration can be turned off using `--skip_baserecalibration true` option. This is useful when you are analyzing data from non-model organisms where there is no known variant datasets exist. + +`GATK SplitNCigarReads` is very time consuming step, therefore we made an attempt to break the GTF file into multiple chunks (scatters) using `GATK IntervalListTools` to run the process independently on each chunk in a parallel way to speed up the analysis. The default number of splits is set to 25, that means the GTF file is split into 25 smaller files and run `GATK SplitNCigarReads` on each of them in parallel. You can modify the number of splits using parameter `--gatk_interval_scatter_count`. + +## Variant calling and filtering + +`GATK HaplotypeCaller` is used for variant calling with default minimum phred-scaled confidence threshold as 20. This value can be changed using paramerter `--gatk_hc_call_conf`. + +The pipeline runs a hard-filtering step on the variants by default. It does not filter out any variants, rather it flags i.e. PASS or other flags such as FS, QD, SnpCluster, etc. in FILTER column of the VCF. The following are the default filter criteria, however it can be changed using the respective parameters. + +- `--gatk_vf_cluster_size` is set to 3. It is the number of SNPs which make up a cluster. +- `--gatk_vf_window_size` is set to 35. The window size (in bases) in which to evaluate clustered SNPs. +- `--gatk_vf_fs_filter` is set to 30.0. Filter based on FisherStrand > 30.0. It is the Phred-scaled probability that there is strand bias at the site. +- `--gatk_vf_qd_filter` is set to 2.0 meaning filter variants if Quality By Depth filter is < 2.0. + +Variant filtering is an optional step. You can skip it using `--skip_variantfiltration` parameter. + +## Variant annotation + +The annotation of variants is performed using snpEff and VEP. The parameter to use is `--annotate_tools snpeff` or `--annotate_tools vep`. You can even run both snpEff and VEP using `--annotate_tools merge`, in this case the output VCF file will have both snpEff and VEP annotations combined. + +You can skip the variant annotation step using `--skip_variantannotation` parameter or without passing `--annotate_tools` options. + +### Annotation cache + +Both `snpEff` and `VEP` enable usage of cache. +If cache is available on the machine where `rnavar` is run, it is possible to run annotation using cache. +You need to specify the cache directory using `--snpeff_cache` and `--vep_cache` in the command lines or within configuration files. +The cache will only be used when `--annotation_cache` and cache directories are specified (either in command lines or in a configuration file). + +Example: + +```bash +nextflow run nf-core/rnavar --input samplesheet.csv --genome GRCh38 -profile docker --annoate_tools snpEff --snpeff_cache --annotation_cache +nextflow run nf-core/rnavar --input samplesheet.csv --genome GRCh38 -profile docker --annotate_tools VEP --vep_cache --annotation_cache +``` + +### Download annotation cache + +A `Nextflow` helper script [link](https://raw.githubusercontent.com/nf-core/sarek/master/download_cache.nf) has been designed to help downloading `snpEff` and `VEP` caches. +Such files are meant to be shared between multiple users, so this script is mainly meant for people administrating servers, clusters and advanced users. + +```bash +nextflow run download_cache.nf --snpeff_cache --snpeff_db --genome +nextflow run download_cache.nf --vep_cache --species --vep_cache_version --genome +``` + +### Using VEP CADD plugin + +To enable the use of the `VEP` `CADD` plugin: + +- Download the `CADD` files +- Specify them (either on the command line, like in the example or in a configuration file) +- use the `--cadd_cache` flag + +Example: + +```bash +nextflow run nf-core/rnavar --input samplesheet.csv --genome GRCh38 -profile docker --annotate_tools VEP VEP --cadd_cache \ + --cadd_indels \ + --cadd_indels_tbi \ + --cadd_wg_snvs \ + --cadd_wg_snvs_tbi +``` + +### Downloading CADD files + +An helper script has been designed to help downloading `CADD` files. +Such files are meant to be share between multiple users, so this script is mainly meant for people administrating servers, clusters and advanced users. + +```bash +nextflow run download_cache.nf --cadd_cache --cadd_version --genome +``` + +## GENERAL NEXTFLOW ARGUMENTS :::note These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). @@ -171,11 +286,11 @@ You can also supply a run name to resume a specific run: `-resume [run-name]`. U Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -## Custom configuration +## CUSTOM CONFIGURATIONS ### Resource requests -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnavar/blob/master/conf/base.config#L17) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. diff --git a/main.nf b/main.nf index 90e7fcb4..4d823ad6 100644 --- a/main.nf +++ b/main.nf @@ -11,25 +11,38 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -include { RNAVAR } from './workflows/rnavar' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_rnavar_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_rnavar_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnavar_pipeline' +params.fasta = getGenomeAttribute('fasta') +params.fasta_fai = getGenomeAttribute('fasta_fai') +params.dict = getGenomeAttribute('dict') +params.gtf = getGenomeAttribute('gtf') +params.gff = getGenomeAttribute('gff') +params.exon_bed = getGenomeAttribute('exon_bed') +params.star_index = getGenomeAttribute('star') +params.dbsnp = getGenomeAttribute('dbsnp') +params.dbsnp_tbi = getGenomeAttribute('dbsnp_tbi') +params.known_indels = getGenomeAttribute('known_indels') +params.known_indels_tbi = getGenomeAttribute('known_indels_tbi') +params.snpeff_db = getGenomeAttribute('snpeff_db') +params.snpeff_genome = getGenomeAttribute('snpeff_genome') +params.vep_cache_version = getGenomeAttribute('vep_cache_version') +params.vep_genome = getGenomeAttribute('vep_genome') +params.vep_species = getGenomeAttribute('vep_species') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +include { RNAVAR } from './workflows/rnavar' +include { ANNOTATION_CACHE_INITIALISATION } from './subworkflows/local/annotation_cache_initialisation' +include { DOWNLOAD_CACHE_SNPEFF_VEP } from './subworkflows/local/download_cache_snpeff_vep' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_rnavar_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_rnavar_pipeline' +include { PREPARE_GENOME } from './subworkflows/local/prepare_genome' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -41,18 +54,124 @@ params.fasta = getGenomeAttribute('fasta') // WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_RNAVAR { - take: - samplesheet // channel: samplesheet read in from --input + samplesheet main: + ch_versions = Channel.empty() + + // Initialize fasta file with meta map: + ch_fasta_raw = params.fasta ? Channel.fromPath(params.fasta).map{ it -> [ [id:it.baseName], it ] }.collect() : Channel.empty() + + // Initialize file channels based on params, defined in the params.genomes[params.genome] scope + ch_dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) + ch_known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) + ch_gff = params.gff ? Channel.fromPath(params.gff).collect() : Channel.empty() + ch_gtf_raw = params.gtf ? Channel.fromPath(params.gtf).map{ gtf -> [ [ id:gtf.baseName ], gtf ] }.collect() : Channel.empty() + + // Initialize variant annotation associated channels + snpeff_db = params.snpeff_db ?: Channel.empty() + vep_cache_version = params.vep_cache_version ?: Channel.empty() + vep_genome = params.vep_genome ?: Channel.empty() + vep_species = params.vep_species ?: Channel.empty() + + seq_platform = params.seq_platform ?: [] + seq_center = params.seq_center ?: [] + + // Initialize value channels based on params, defined in the params.genomes[params.genome] scope + snpeff_db = params.snpeff_db ?: Channel.empty() + vep_cache_version = params.vep_cache_version ?: Channel.empty() + vep_genome = params.vep_genome ?: Channel.empty() + vep_species = params.vep_species ?: Channel.empty() + + vep_extra_files = [] + + if (params.dbnsfp && params.dbnsfp_tbi) { + vep_extra_files.add(file(params.dbnsfp, checkIfExists: true)) + vep_extra_files.add(file(params.dbnsfp_tbi, checkIfExists: true)) + } + + if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && params.spliceai_indel_tbi) { + vep_extra_files.add(file(params.spliceai_indel, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_indel_tbi, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv, checkIfExists: true)) + vep_extra_files.add(file(params.spliceai_snv_tbi, checkIfExists: true)) + } + + PREPARE_GENOME( + ch_fasta_raw, + ch_gff, + ch_gtf_raw, + ch_dbsnp, + ch_known_indels, + params.feature_type) + + ch_fasta = PREPARE_GENOME.out.fasta + ch_star_index = PREPARE_GENOME.out.star_index + ch_gtf = PREPARE_GENOME.out.gtf + ch_dict = params.dict ? Channel.fromPath(params.dict).map{ it -> [ [id:'dict'], it ] }.collect() + : PREPARE_GENOME.out.dict + ch_fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai) + : PREPARE_GENOME.out.fasta_fai + ch_exon_bed = params.exon_bed ? Channel.fromPath(params.exon_bed).map{ it -> [ [id:'exon_bed'], it ] }.collect() + : PREPARE_GENOME.out.exon_bed + ch_dbsnp_tbi = params.dbsnp ? params.dbsnp_tbi ? Channel.fromPath(params.dbsnp_tbi).collect() + : PREPARE_GENOME.out.dbsnp_tbi : Channel.value([]) + ch_known_indels_tbi = params.known_indels ? params.known_indels_tbi ? Channel.fromPath(params.known_indels_tbi).collect() + : PREPARE_GENOME.out.known_indels_tbi : Channel.value([]) + + // Download cache + if (params.download_cache) { + // Assuming that even if the cache is provided, if the user specify download_cache, rnavar will download the cache + ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) + snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) + DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) + snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache + vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } + + ch_versions = ch_versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) + } else { + // Looks for cache information either locally or on the cloud + ANNOTATION_CACHE_INITIALISATION( + (params.snpeff_cache && params.annotate_tools && (params.annotate_tools.split(',').contains("snpeff") || params.annotate_tools.split(',').contains('merge'))), + params.snpeff_cache, + params.snpeff_genome, + params.snpeff_db, + (params.vep_cache && params.annotate_tools && (params.annotate_tools.split(',').contains("vep") || params.annotate_tools.split(',').contains('merge'))), + params.vep_cache, + params.vep_species, + params.vep_cache_version, + params.vep_genome, + "Please refer to https://nf-co.re/rnavar/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") + + snpeff_cache = ANNOTATION_CACHE_INITIALISATION.out.snpeff_cache + vep_cache = ANNOTATION_CACHE_INITIALISATION.out.ensemblvep_cache + } + // // WORKFLOW: Run pipeline // - RNAVAR ( - samplesheet - ) + RNAVAR(samplesheet, + ch_dbsnp, + ch_dbsnp_tbi, + ch_dict, + ch_exon_bed, + ch_fasta, + ch_fasta_fai, + ch_gtf, + ch_known_indels, + ch_known_indels_tbi, + ch_star_index, + snpeff_cache, + params.vep_genome, + params.vep_species, + params.vep_cache_version, + vep_cache, + vep_extra_files, + seq_center, + seq_platform) + emit: multiqc_report = RNAVAR.out.multiqc_report // channel: /path/to/multiqc_report.html } @@ -80,9 +199,8 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_RNAVAR ( - PIPELINE_INITIALISATION.out.samplesheet - ) + NFCORE_RNAVAR(PIPELINE_INITIALISATION.out.samplesheet) + // // SUBWORKFLOW: Run completion tasks // @@ -97,6 +215,25 @@ workflow { ) } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Get attribute from genome config file e.g. fasta +// + +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules.json b/modules.json old mode 100644 new mode 100755 index db4ad174..c3395b67 --- a/modules.json +++ b/modules.json @@ -5,20 +5,230 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bcftools/annotate": { + "branch": "master", + "git_sha": "2ad29c2aed06d815d9f68ad7ba20b3b1c574ce9c", + "installed_by": ["modules"] + }, + "bcftools/concat": { + "branch": "master", + "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", + "installed_by": ["vcf_annotate_ensemblvep", "vcf_annotate_snpeff"] + }, + "bcftools/pluginscatter": { + "branch": "master", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "installed_by": ["vcf_annotate_ensemblvep", "vcf_annotate_snpeff"] + }, + "bcftools/sort": { + "branch": "master", + "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", + "installed_by": ["vcf_annotate_ensemblvep", "vcf_annotate_snpeff"] + }, + "bedtools/merge": { + "branch": "master", + "git_sha": "a5377837fe9013bde89de8689829e83e84086536", + "installed_by": ["modules"] + }, + "bedtools/sort": { + "branch": "master", + "git_sha": "571a5feac4c9ce0a8df0bc15b94230e7f3e8db47", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "4fc983ad0b30e6e32696fa7d980c76c7bfe1c03e", + "installed_by": ["modules"] + }, + "ensemblvep/download": { + "branch": "master", + "git_sha": "3db4f8488315cd7d7cf3fcb64251f6603210e831", + "installed_by": ["modules"] + }, + "ensemblvep/vep": { + "branch": "master", + "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", + "installed_by": ["modules", "vcf_annotate_ensemblvep"] + }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "gatk4/applybqsr": { + "branch": "master", + "git_sha": "af273ea6618c50e82c372abe18b0a225e84fe6f7", + "installed_by": ["modules"] + }, + "gatk4/baserecalibrator": { + "branch": "master", + "git_sha": "8a223e11d4e6deb36484e01891eae9c1cacb5f5d", + "installed_by": ["modules"] + }, + "gatk4/bedtointervallist": { + "branch": "master", + "git_sha": "d3f215802f696f7993f25c759781d2db91232015", + "installed_by": ["modules"] + }, + "gatk4/combinegvcfs": { + "branch": "master", + "git_sha": "fe3d7aac1b62efe4fd05cc9629d192e9cdfc3ccb", + "installed_by": ["modules"] + }, + "gatk4/createsequencedictionary": { + "branch": "master", + "git_sha": "e6fe277739f5894711405af3e717b2470bd956b5", + "installed_by": ["modules"] + }, + "gatk4/haplotypecaller": { + "branch": "master", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "installed_by": ["modules"] + }, + "gatk4/indexfeaturefile": { + "branch": "master", + "git_sha": "108e1f5bafc045ac19890852a41e8d95ae12aa67", + "installed_by": ["modules"] + }, + "gatk4/intervallisttools": { + "branch": "master", + "git_sha": "afb604624abcc2fc67a43d70a2de369a50d16105", + "installed_by": ["modules"] + }, + "gatk4/mergevcfs": { + "branch": "master", + "git_sha": "194fca815cf594646e638fa5476acbcc296f1850", + "installed_by": ["modules"] + }, + "gatk4/splitncigarreads": { + "branch": "master", + "git_sha": "1e15ad71d9022293985dd02e24186e5e496a0b3c", + "installed_by": ["modules"] + }, + "gatk4/variantfiltration": { + "branch": "master", + "git_sha": "00409369b2aff69bded4f0a9e7a80e17d933b426", + "installed_by": ["modules"] + }, + "gffread": { + "branch": "master", + "git_sha": "6c996d7fbe0816dcbb68ce587ad5f873313682a1", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", + "installed_by": ["modules"] + }, + "mosdepth": { + "branch": "master", + "git_sha": "e0616fba0919adb190bfe070d17fb12d76ba3a26", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", "installed_by": ["modules"] + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "1943aa60f7490c3d6740e8872e6e69122ccc8087", + "installed_by": ["bam_markduplicates_picard"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "f153f1f10e1083c49935565844cccb7453021682", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools", "modules"] + }, + "samtools/merge": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "4352dbdb09ec40db71e9b172b97a01dcf5622c26", + "installed_by": ["bam_sort_stats_samtools", "modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "snpeff/download": { + "branch": "master", + "git_sha": "214d575774c172062924ad3564b4f66655600730", + "installed_by": ["modules"] + }, + "snpeff/snpeff": { + "branch": "master", + "git_sha": "2f3db6f45147ebbb56b371536e31bdf622b5bfee", + "installed_by": ["modules", "vcf_annotate_snpeff"] + }, + "star/align": { + "branch": "master", + "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", + "installed_by": ["fastq_align_star", "modules"] + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", + "installed_by": ["modules"] + }, + "tabix/bgziptabix": { + "branch": "master", + "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", + "installed_by": ["modules", "vcf_annotate_snpeff"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", + "installed_by": ["modules", "vcf_annotate_ensemblvep"] + }, + "untar": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["modules"] } } }, "subworkflows": { "nf-core": { + "bam_markduplicates_picard": { + "branch": "master", + "git_sha": "1943aa60f7490c3d6740e8872e6e69122ccc8087", + "installed_by": ["subworkflows"] + }, + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "4352dbdb09ec40db71e9b172b97a01dcf5622c26", + "installed_by": ["fastq_align_star", "subworkflows"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] + }, + "fastq_align_star": { + "branch": "master", + "git_sha": "1d1d7df613ff53223259c14185858cd742cd4743", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", @@ -33,6 +243,16 @@ "branch": "master", "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", "installed_by": ["subworkflows"] + }, + "vcf_annotate_ensemblvep": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] + }, + "vcf_annotate_snpeff": { + "branch": "master", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", + "installed_by": ["subworkflows"] } } } diff --git a/modules/local/gtf2bed/main.nf b/modules/local/gtf2bed/main.nf new file mode 100755 index 00000000..b18a63d4 --- /dev/null +++ b/modules/local/gtf2bed/main.nf @@ -0,0 +1,41 @@ +process GTF2BED { + tag "$gtf" + label 'process_low' + + conda "conda-forge::r-base=3.5.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-base:3.5.0' : + 'biocontainers/r-base:3.5.0'}" + + input: + tuple val(meta), path(gtf) + val feature_type + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/rnaseq/bin/ + def allowed_type = ["exon", "transcript", "gene"]; + if (feature_type){ + feature_type = allowed_type.contains(feature_type) ? feature_type : "exon" + } + """ + Rscript --no-save -<<'RCODE' + gtf = read.table("${gtf}", sep="\t") + gtf = subset(gtf, V3 == "${feature_type}") + write.table(data.frame(chrom=gtf[,'V1'], start=gtf[,'V4'], end=gtf[,'V5']), "tmp.exome.bed", quote = F, sep="\t", col.names = F, row.names = F) + RCODE + + awk '{print \$1 "\t" (\$2 - 1) "\t" \$3}' tmp.exome.bed > exome.bed + rm -rf tmp.exome.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Rscript: \$(echo \$(Rscript --version 2>&1) | sed 's/R scripting front-end version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 00000000..e0abc8d2 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 00000000..64e7ec76 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,65 @@ +process BCFTOOLS_ANNOTATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(header_lines) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + bcftools \\ + annotate \\ + $args \\ + $annotations_file \\ + $header_file \\ + --output ${prefix}.${extension} \\ + --threads $task.cpus \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + """ + ${create_cmd} ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 00000000..f3aa463b --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,56 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + - index: + type: file + description: Index of the query VCF or BCF file + - annotations: + type: file + description: Bgzip-compressed file with annotations + - annotations_index: + type: file + description: Index of the annotations file + - header_lines: + type: file + description: Contains lines to append to the output VCF header +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/annotate/tests/bcf.config b/modules/nf-core/bcftools/annotate/tests/bcf.config new file mode 100644 index 00000000..b8496b33 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/bcf.config @@ -0,0 +1,6 @@ +process { + withName: 'BCFTOOLS_ANNOTATE' { + ext.args = "-x ID,INFO/DP,FORMAT/DP --output-type u" + ext.prefix = { "${meta.id}_ann" } + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test b/modules/nf-core/bcftools/annotate/tests/main.nf.test new file mode 100644 index 00000000..9abf9475 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ANNOTATE" + script "../main.nf" + process "BCFTOOLS_ANNOTATE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/annotate" + + test("sarscov2 - [vcf, tbi, vcf2, tbi2, []] - vcf_output") { + + config "./vcf.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match("vcf") } + ) + } + + } + + + test("sarscov2 - [vcf, [], [], [], header] - bcf_output") { + + config "./bcf.config" + + when { + process { + """ + vcf = Channel.of([ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + [], + [], + [] + ]) + + header = Channel.of( + '##INFO=', + '##INFO=' + ) + .collectFile(name:"headers.vcf", newLine:true) + + input[0] = vcf.combine(header) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.versions + ).match("bcf") } + ) + } + + } + + test("sarscov2 - [vcf, tbi, vcf2, tbi2, []] - stub") { + + config "./vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap new file mode 100644 index 00000000..72b06878 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap @@ -0,0 +1,79 @@ +{ + "bcf": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + "versions.yml:md5,ea2b798edb5c1c0d46938f313a97a901" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-03T09:21:58.526449656" + }, + "vcf": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz" + ] + ], + [ + "versions.yml:md5,ea2b798edb5c1c0d46938f313a97a901" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-02T16:47:47.127427579" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,ea2b798edb5c1c0d46938f313a97a901" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,ea2b798edb5c1c0d46938f313a97a901" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-02T16:43:43.707474961" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/tags.yml b/modules/nf-core/bcftools/annotate/tests/tags.yml new file mode 100644 index 00000000..f97a1afc --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/annotate: + - "modules/nf-core/bcftools/annotate/**" diff --git a/modules/nf-core/bcftools/annotate/tests/vcf.config b/modules/nf-core/bcftools/annotate/tests/vcf.config new file mode 100644 index 00000000..cb809f61 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/vcf.config @@ -0,0 +1,6 @@ +process { + withName: 'BCFTOOLS_ANNOTATE' { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } +} diff --git a/modules/nf-core/bcftools/concat/environment.yml b/modules/nf-core/bcftools/concat/environment.yml new file mode 100644 index 00000000..ff0200df --- /dev/null +++ b/modules/nf-core/bcftools/concat/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_concat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf new file mode 100644 index 00000000..d64640b3 --- /dev/null +++ b/modules/nf-core/bcftools/concat/main.nf @@ -0,0 +1,46 @@ +process BCFTOOLS_CONCAT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcfs), path(tbi) + + output: + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools concat \\ + --output ${prefix}.vcf.gz \\ + $args \\ + --threads $task.cpus \\ + ${vcfs} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml new file mode 100644 index 00000000..91cb54d5 --- /dev/null +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -0,0 +1,51 @@ +name: bcftools_concat +description: Concatenate VCF files +keywords: + - variant calling + - concat + - bcftools + - VCF +tools: + - concat: + description: | + Concatenate VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbi: + type: list + description: | + List containing 2 or more index files (optional) + e.g. [ 'file1.tbi', 'file2.tbi' ] +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF concatenated output file + pattern: "*.{vcf.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@nvnieuwk" +maintainers: + - "@abhi18av" + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test b/modules/nf-core/bcftools/concat/tests/main.nf.test new file mode 100644 index 00000000..d5d9f787 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test @@ -0,0 +1,99 @@ +nextflow_process { + + name "Test Process BCFTOOLS_CONCAT" + script "../main.nf" + process "BCFTOOLS_CONCAT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/concat" + + config "./nextflow.config" + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]]") { + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [[vcf1, vcf2], []]") { + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test.snap b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap new file mode 100644 index 00000000..90d3fc06 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "sarscov2 - [[vcf1, vcf2], []]": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "1": [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "versions": [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-03T13:22:49.766749302" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]]": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "1": [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "versions": [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-03T13:22:41.119912312" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-03T13:22:59.90466392" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/nextflow.config b/modules/nf-core/bcftools/concat/tests/nextflow.config new file mode 100644 index 00000000..f3e1e98c --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "--no-version" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/tags.yml b/modules/nf-core/bcftools/concat/tests/tags.yml new file mode 100644 index 00000000..21710d4e --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/concat: + - "modules/nf-core/bcftools/concat/**" diff --git a/modules/nf-core/bcftools/pluginscatter/environment.yml b/modules/nf-core/bcftools/pluginscatter/environment.yml new file mode 100644 index 00000000..42e97daa --- /dev/null +++ b/modules/nf-core/bcftools/pluginscatter/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_pluginscatter +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/pluginscatter/main.nf b/modules/nf-core/bcftools/pluginscatter/main.nf new file mode 100644 index 00000000..9f52e947 --- /dev/null +++ b/modules/nf-core/bcftools/pluginscatter/main.nf @@ -0,0 +1,71 @@ +process BCFTOOLS_PLUGINSCATTER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + val(sites_per_chunk) + val(scatter) + path(scatter_file) + path(regions) + path(targets) + + output: + tuple val(meta), path("*{vcf,vcf.gz,bcf,bcf.gz}") , emit: scatter + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def mandatory_arg = sites_per_chunk ? "--nsites-per-chunk ${sites_per_chunk}" : scatter ? "--scatter ${scatter}" : "--scatter-file ${scatter_file}" + def regions_arg = regions ? "--regions-file ${regions}" : "" + def targets_arg = targets ? "--targets-file ${targets}" : "" + """ + bcftools plugin scatter \\ + ${vcf} \\ + ${mandatory_arg} \\ + ${regions_arg} \\ + ${targets_arg} \\ + --output ${prefix} \\ + --prefix ${prefix} \\ + --threads ${task.cpus} \\ + ${args} + + mv ${prefix}/* . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + touch ${prefix}1.${extension} + touch ${prefix}2.${extension} + touch ${prefix}3.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/pluginscatter/meta.yml b/modules/nf-core/bcftools/pluginscatter/meta.yml new file mode 100644 index 00000000..1a716ead --- /dev/null +++ b/modules/nf-core/bcftools/pluginscatter/meta.yml @@ -0,0 +1,72 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "bcftools_pluginscatter" +description: Split VCF by chunks or regions, creating multiple VCFs. +keywords: + - scatter + - vcf + - bcf + - genomics +tools: + - "pluginscatter": + description: | + Split VCF by chunks or regions, creating multiple VCFs. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://samtools.github.io/bcftools/bcftools.html#reheader + doi: 10.1093/gigascience/giab008 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The input VCF to scatter + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - tbi: + type: file + description: Optional index of the input VCF + pattern: "*.tbi" + - sites_per_chunk: + type: integer + description: | + How many variants should be in each output file + Either this or `scatter` or `scatter_file` have to be given + - scatter: + type: string + description: | + A comma delimited list of regions to scatter into + Either this or `sites_per_chunk` or `scatter_file` have to be given + - scatter_file: + type: file + description: | + A file containing a region on each line with an optional second column containing the filename + Either this or `sites_per_chunk` or `scatter` have to be given + - regions: + type: file + description: Optional file containing the regions to work on + pattern: "*.bed" + - targets: + type: file + description: Optional file containing the regions to work on (but streams instead of index-jumping) + pattern: "*.bed" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - scatter: + type: file + description: The resulting files of the scattering + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/sort/environment.yml b/modules/nf-core/bcftools/sort/environment.yml new file mode 100644 index 00000000..89cf911d --- /dev/null +++ b/modules/nf-core/bcftools/sort/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf new file mode 100644 index 00000000..246148d6 --- /dev/null +++ b/modules/nf-core/bcftools/sort/main.nf @@ -0,0 +1,61 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools \\ + sort \\ + --output ${prefix}.${extension} \\ + --temp-dir . \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + """ + ${create_cmd} ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml new file mode 100644 index 00000000..84747c6d --- /dev/null +++ b/modules/nf-core/bcftools/sort/meta.yml @@ -0,0 +1,42 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" +authors: + - "@Gwennid" +maintainers: + - "@Gwennid" diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test b/modules/nf-core/bcftools/sort/tests/main.nf.test new file mode 100644 index 00000000..8a496dda --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process BCFTOOLS_SORT" + script "../main.nf" + process "BCFTOOLS_SORT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/sort" + + test("sarscov2 - vcf") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("vcf") } + ) + } + + } + + test("sarscov2 - vcf - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test.snap b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..6200cc42 --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-02T16:55:21.237927554" + }, + "sarscov2 - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-03T12:32:50.506309198" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/sort/tests/tags.yml b/modules/nf-core/bcftools/sort/tests/tags.yml new file mode 100644 index 00000000..6e9520dd --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/sort: + - "modules/nf-core/bcftools/sort/**" diff --git a/modules/nf-core/bedtools/merge/environment.yml b/modules/nf-core/bedtools/merge/environment.yml new file mode 100644 index 00000000..99707878 --- /dev/null +++ b/modules/nf-core/bedtools/merge/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf new file mode 100644 index 00000000..5310647d --- /dev/null +++ b/modules/nf-core/bedtools/merge/main.nf @@ -0,0 +1,47 @@ +process BEDTOOLS_MERGE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + + output: + tuple val(meta), path('*.bed'), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + merge \\ + -i $bed \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml new file mode 100644 index 00000000..d7463e3d --- /dev/null +++ b/modules/nf-core/bedtools/merge/meta.yml @@ -0,0 +1,45 @@ +name: bedtools_merge +description: combines overlapping or “book-ended” features in an interval file into a single feature which spans all of the combined features. +keywords: + - bed + - merge + - bedtools + - overlapped bed +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/merge.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Overlapped bed file with combined features + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test b/modules/nf-core/bedtools/merge/tests/main.nf.test new file mode 100644 index 00000000..95dba8e5 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process BEDTOOLS_MERGE" + script "../main.nf" + config "./nextflow.config" + process "BEDTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/merge" + + test("test_bedtools_merge") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test.snap b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..ee6c4e63 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_bedtools_merge": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "1": [ + "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6" + ], + "bed": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "versions": [ + "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T17:07:09.721153" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/merge/tests/nextflow.config b/modules/nf-core/bedtools/merge/tests/nextflow.config new file mode 100644 index 00000000..16444e98 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: BEDTOOLS_MERGE { + ext.prefix = { "${meta.id}_out" } + } + +} diff --git a/modules/nf-core/bedtools/merge/tests/tags.yml b/modules/nf-core/bedtools/merge/tests/tags.yml new file mode 100644 index 00000000..60c8cad1 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/merge: + - "modules/nf-core/bedtools/merge/**" diff --git a/modules/nf-core/bedtools/sort/environment.yml b/modules/nf-core/bedtools/sort/environment.yml new file mode 100644 index 00000000..87b2e425 --- /dev/null +++ b/modules/nf-core/bedtools/sort/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf new file mode 100644 index 00000000..b833150a --- /dev/null +++ b/modules/nf-core/bedtools/sort/main.nf @@ -0,0 +1,54 @@ +process BEDTOOLS_SORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(intervals) + path genome_file + + output: + tuple val(meta), path("*.${extension}"), emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def genome_cmd = genome_file ? "-g $genome_file" : "" + extension = task.ext.suffix ?: intervals.extension + if ("$intervals" == "${prefix}.${extension}") { + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + } + """ + bedtools \\ + sort \\ + -i $intervals \\ + $genome_cmd \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: intervals.extension + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml new file mode 100644 index 00000000..7c915f5f --- /dev/null +++ b/modules/nf-core/bedtools/sort/meta.yml @@ -0,0 +1,54 @@ +name: bedtools_sort +description: Sorts a feature file by chromosome and other criteria. +keywords: + - bed + - sort + - bedtools + - chromosome +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/sort.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BED/BEDGRAPH + pattern: "*.{bed|bedGraph}" + - genome_file: + type: file + description: | + Optional reference genome 2 column file that defines the expected chromosome order. + pattern: "*.{fai,txt,chromsizes}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sorted: + type: file + description: Sorted output file + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@chris-cheshire" + - "@adamrtalbot" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@chris-cheshire" + - "@adamrtalbot" diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test b/modules/nf-core/bedtools/sort/tests/main.nf.test new file mode 100644 index 00000000..b1f36dd9 --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process BEDTOOLS_SORT" + script "../main.nf" + config "./nextflow.config" + process "BEDTOOLS_SORT" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/sort" + + test("test_bedtools_sort") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + + test("test_bedtools_sort_with_genome") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test.snap b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..f10e8b98 --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test_bedtools_sort_with_genome": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "1": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "versions": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T10:13:11.830452" + }, + "test_bedtools_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "1": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ], + "sorted": [ + [ + { + "id": "test" + }, + "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74" + ] + ], + "versions": [ + "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T10:16:40.535947" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/nextflow.config b/modules/nf-core/bedtools/sort/tests/nextflow.config new file mode 100644 index 00000000..f203c99c --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: BEDTOOLS_SORT { + ext.prefix = { "${meta.id}_out" } + ext.suffix = "testtext" + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/sort/tests/tags.yml b/modules/nf-core/bedtools/sort/tests/tags.yml new file mode 100644 index 00000000..47c85eea --- /dev/null +++ b/modules/nf-core/bedtools/sort/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/sort: + - "modules/nf-core/bedtools/sort/**" diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..8c69b121 --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::coreutils=8.30 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..f132b2ad --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,79 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..db4ac3c7 --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,42 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..a71dcb8d --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,140 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..43dfe28f --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,169 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:30:39.816981" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:35.229332" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:34:00.058829" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:33:33.031555" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:02.270935" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 00000000..6ac43614 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/ensemblvep/download/environment.yml b/modules/nf-core/ensemblvep/download/environment.yml new file mode 100644 index 00000000..2ea8642f --- /dev/null +++ b/modules/nf-core/ensemblvep/download/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_download +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=111.0 diff --git a/modules/nf-core/ensemblvep/download/main.nf b/modules/nf-core/ensemblvep/download/main.nf new file mode 100644 index 00000000..f9e025a5 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/main.nf @@ -0,0 +1,47 @@ +process ENSEMBLVEP_DOWNLOAD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:111.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:111.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), val(assembly), val(species), val(cache_version) + + output: + tuple val(meta), path(prefix), emit: cache + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: 'vep_cache' + """ + vep_install \\ + --CACHEDIR $prefix \\ + --SPECIES $species \\ + --ASSEMBLY $assembly \\ + --CACHE_VERSION $cache_version \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: 'vep_cache' + """ + mkdir $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ensemblvep/download/meta.yml b/modules/nf-core/ensemblvep/download/meta.yml new file mode 100644 index 00000000..a4277ad7 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/meta.yml @@ -0,0 +1,45 @@ +name: ensemblvep_download +description: Ensembl Variant Effect Predictor (VEP). The cache downloading options are controlled through `task.ext.args`. +keywords: + - annotation + - cache + - download +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: string + description: | + Genome assembly + - species: + type: string + description: | + Specie + - cache_version: + type: string + description: | + cache version +output: + - cache: + type: file + description: cache + pattern: "*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/ensemblvep/download/tests/main.nf.test b/modules/nf-core/ensemblvep/download/tests/main.nf.test new file mode 100644 index 00000000..760c9d56 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process ENSEMBLVEP_DOWNLOAD" + script "../main.nf" + process "ENSEMBLVEP_DOWNLOAD" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ensemblvep" + tag "ensemblvep/download" + + test("celegans - download") { + + when { + process { + """ + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("celegans - download - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap new file mode 100644 index 00000000..6ea596fb --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/main.nf.test.snap @@ -0,0 +1,322 @@ +{ + "celegans - download": { + "content": [ + { + "0": [ + [ + { + "id": "111_WBcel235" + }, + [ + [ + [ + [ + "1-1000000.gz:md5,4da54db2f781d08975630811fd831585", + "10000001-11000000.gz:md5,7bee73e51d08f5e6de1796614105c5de", + "1000001-2000000.gz:md5,e8beff9020e261d78988c37e804cc89a", + "11000001-12000000.gz:md5,1a31b2dcf58822e837cd91b7a74a8b4f", + "12000001-13000000.gz:md5,34522be2ee5bd3cf51a9b151c877fe35", + "13000001-14000000.gz:md5,4e5a7b604f8aea2ad9de79b75ed89a6b", + "14000001-15000000.gz:md5,65146be110ea58b64ab8518bcbfbde9d", + "15000001-16000000.gz:md5,a39fdb7b0b056c0254574353351722eb", + "2000001-3000000.gz:md5,b72a04531477615dc4d2c530d09e60df", + "3000001-4000000.gz:md5,50dae46f370e1468c8f8f2c97cc75f0b", + "4000001-5000000.gz:md5,e58e124073689212e5311fbd8ccce415", + "5000001-6000000.gz:md5,db27434dc8be9557f97aa09a95126836", + "6000001-7000000.gz:md5,a5131e3ee41b329eb55fb3849ddb1471", + "7000001-8000000.gz:md5,61e1fbe1a82892a0f9f6ee0380fa60bc", + "8000001-9000000.gz:md5,48166dd4518ec21f597b6acca73809bb", + "9000001-10000000.gz:md5,3e416c856f40f36ec0ed3f42a93b2267" + ], + [ + "1-1000000.gz:md5,06b83c3bd2c651c5a8a96f0865d54a53", + "10000001-11000000.gz:md5,79b3348f860370d1697e6d4de128fca3", + "1000001-2000000.gz:md5,f98e31f3e25c0a419ebeee5b17527b00", + "11000001-12000000.gz:md5,5f23214fdef1f7637f6046dc751155ed", + "12000001-13000000.gz:md5,9a4422905679e543a92d47142b1acba1", + "13000001-14000000.gz:md5,c5db99e7d56f2115f8da8fe3af83314a", + "14000001-15000000.gz:md5,66b65af3732c0495dc74f3071203ac2f", + "15000001-16000000.gz:md5,d4c30dc42925cc92dc594d4145544f33", + "2000001-3000000.gz:md5,ff9b3fd1235468c738e9201e2fa98e08", + "3000001-4000000.gz:md5,c649308c7d3b4891a8c6f95e583f3a08", + "4000001-5000000.gz:md5,c53d2fa6695248d0a725ef70325aae91", + "5000001-6000000.gz:md5,5481fb4b60ebd97256f5d52c42aee0bd", + "6000001-7000000.gz:md5,94b82e096bacb091e0ef55bcd08b8ab8", + "7000001-8000000.gz:md5,83f571dfaf8b891bf27208466e6f7d8c", + "8000001-9000000.gz:md5,4f07e30c7d772544bc6e99bac371b97b", + "9000001-10000000.gz:md5,f1439765f6428ae99516d95dc5df3926" + ], + [ + "1-1000000.gz:md5,dab3bbb82e7ecc5430319b7723b88436", + "10000001-11000000.gz:md5,a1af0b4ce9c2ff301ac0a935a4189c58", + "1000001-2000000.gz:md5,8a70e4d08b14a4cf19b03a0556a6cae8", + "11000001-12000000.gz:md5,e866bb880cd79e612dc2081635368017", + "12000001-13000000.gz:md5,1b5be2ca310afd8289561331d19ddf07", + "13000001-14000000.gz:md5,907d2bb3f6b9b75fca9e40f1220c0cb4", + "2000001-3000000.gz:md5,783bcc5957ca4853853c5cda0418dbcd", + "3000001-4000000.gz:md5,cb2df81caa4a20215bb793ca2a792e01", + "4000001-5000000.gz:md5,2046030a187c0a86c9be02531aea0ed7", + "5000001-6000000.gz:md5,395a977401540eb90607b31ecc786a0f", + "6000001-7000000.gz:md5,e6a97128bc38649fcfa5dcb0032a570e", + "7000001-8000000.gz:md5,08804df16d4cdeb5a733d2d6b07b058e", + "8000001-9000000.gz:md5,bba084d260c12613403d144073105d9b", + "9000001-10000000.gz:md5,c0775c413018ed7964f3042112fe4e26" + ], + [ + "1-1000000.gz:md5,710a2c1ad83c3c9751a0f152cd98f02c", + "10000001-11000000.gz:md5,ac93a92e62043bddaa59973e26dac8e0", + "1000001-2000000.gz:md5,232ee5ab6c7295007ffc760f361e4c07", + "11000001-12000000.gz:md5,07e49ac7b78fbc29cd920b11a4d21afc", + "12000001-13000000.gz:md5,7849822cf3df022e9f273fb6a928aa8b", + "13000001-14000000.gz:md5,e3d3f0ee264694c72b3b989a542c5694", + "14000001-15000000.gz:md5,e5771b6c2fefe9f62b23c71ab873f94f", + "15000001-16000000.gz:md5,fa02114035f63a504c48005c498f8ec3", + "16000001-17000000.gz:md5,ef0fd03281afc2e636a64fd61df8c4dc", + "17000001-18000000.gz:md5,4483a6d74a07b1101dccee71a22213ed", + "2000001-3000000.gz:md5,f164cbfdc8bc12efd7c26df3935fe190", + "3000001-4000000.gz:md5,ff05a42801004994a4f30f081bc8945a", + "4000001-5000000.gz:md5,b63f51d54dc3cb4b214b54527dfa4234", + "5000001-6000000.gz:md5,6945f59a1fd50f1dfa8a2f1e55fecc12", + "6000001-7000000.gz:md5,3f27a3cb19ece2a9e87da6fe6476faaa", + "7000001-8000000.gz:md5,05bffc6e8af7a80fdd6cbc53e5512d0c", + "8000001-9000000.gz:md5,8a3594ee1eb15d7aa8edeb325e485dce", + "9000001-10000000.gz:md5,338bce56200690d450d5fcac65a91be7" + ], + [ + "1-1000000.gz:md5,779fda1352c0b1d635aa752c185e6ce2" + ], + [ + "1-1000000.gz:md5,267b3134411641d12bb6efcfd5e9d48d", + "10000001-11000000.gz:md5,4f7473abf622b57ca3c8d6de098759f7", + "1000001-2000000.gz:md5,5587d56269638b9cc88bdb7ae5dacc58", + "11000001-12000000.gz:md5,3da928f2caf09b9e3df58f9d1be2c541", + "12000001-13000000.gz:md5,eae3125082e1674f40fcfb8bb7da23e3", + "13000001-14000000.gz:md5,6a91eccbe0cf05737e2d2971d5618876", + "14000001-15000000.gz:md5,60c9b08ad4f674c0394a7c16dee2713e", + "15000001-16000000.gz:md5,b36c052923f3d3e3cc8db9c2852e1e16", + "16000001-17000000.gz:md5,0d0d7a8735aadda492912d032fd8733b", + "17000001-18000000.gz:md5,cf57648ef4cbc3325cff87e6b4b89653", + "18000001-19000000.gz:md5,5c649205050bfcbb4414db329659dadc", + "19000001-20000000.gz:md5,48838329ef3e8c26dd8b1ba82f925704", + "20000001-21000000.gz:md5,c7e1643d2880881fe5d44f718b8e6755", + "2000001-3000000.gz:md5,94849146caeca44e256aec58f1a914b8", + "3000001-4000000.gz:md5,4601fbb22fda2cbc4ed397707f8f7afd", + "4000001-5000000.gz:md5,7c8617d40b6d2e9a37802691f64f775e", + "5000001-6000000.gz:md5,0e81ba81f807f8429351e46dd8385e0e", + "6000001-7000000.gz:md5,6e86fccda1dc539e291bd5768ffd0dad", + "7000001-8000000.gz:md5,f0bf0306012d738fc71f7a26d7af2d81", + "8000001-9000000.gz:md5,0f3da0d0c714760bc2c39bc6696b74d1", + "9000001-10000000.gz:md5,4e0e2ba92378f6e1f261d5e59d8e3d9f" + ], + [ + "1-1000000.gz:md5,83a0a200cb053b1f28e41fe62068d49a", + "10000001-11000000.gz:md5,2f84058256242378b7d14ef526ed42ea", + "1000001-2000000.gz:md5,c655f70a1d16eef55a5003cdb63434ab", + "11000001-12000000.gz:md5,6d5c34d7a61fa4764c546d1b46a5c90d", + "12000001-13000000.gz:md5,84fceee383bb28edb8d749c744a10932", + "13000001-14000000.gz:md5,aadd01464ca36c813a831f5c2016ba02", + "14000001-15000000.gz:md5,94a5325ca01192d5eea218b30f933ec4", + "15000001-16000000.gz:md5,de84954b08d570585a448d8831c12e6d", + "16000001-17000000.gz:md5,be4954afe2236d62226307f8c9f95820", + "17000001-18000000.gz:md5,8fdacfb47fc5728eb27b22bbb8c9b301", + "2000001-3000000.gz:md5,83552c17d88c3986c56c681b0b49bb97", + "3000001-4000000.gz:md5,deb3bc27c8d431d60fc89a6fe49bcbc6", + "4000001-5000000.gz:md5,98de5bbb694c73f7ffde16fb92069117", + "5000001-6000000.gz:md5,7c5a116261bf41309b18c22b0cba5f52", + "6000001-7000000.gz:md5,fb0d2dc71bd0c9263ff23825d8a4ef64", + "7000001-8000000.gz:md5,2375dcd7787e7ca5d26442cea0ff6710", + "8000001-9000000.gz:md5,979f986c27b91a62873e639e3ebeae43", + "9000001-10000000.gz:md5,b80f6906a724e4b0d6c21dd4c77663fd" + ], + "chr_synonyms.txt:md5,8a6fce00cc7817ec727c49b7954f10bc", + "info.txt:md5,33ccb74a030a9a345051628c337cb8af" + ] + ] + ] + ] + ], + "1": [ + "versions.yml:md5,954fd177c394ba167d575a6aac47390b" + ], + "cache": [ + [ + { + "id": "111_WBcel235" + }, + [ + [ + [ + [ + "1-1000000.gz:md5,4da54db2f781d08975630811fd831585", + "10000001-11000000.gz:md5,7bee73e51d08f5e6de1796614105c5de", + "1000001-2000000.gz:md5,e8beff9020e261d78988c37e804cc89a", + "11000001-12000000.gz:md5,1a31b2dcf58822e837cd91b7a74a8b4f", + "12000001-13000000.gz:md5,34522be2ee5bd3cf51a9b151c877fe35", + "13000001-14000000.gz:md5,4e5a7b604f8aea2ad9de79b75ed89a6b", + "14000001-15000000.gz:md5,65146be110ea58b64ab8518bcbfbde9d", + "15000001-16000000.gz:md5,a39fdb7b0b056c0254574353351722eb", + "2000001-3000000.gz:md5,b72a04531477615dc4d2c530d09e60df", + "3000001-4000000.gz:md5,50dae46f370e1468c8f8f2c97cc75f0b", + "4000001-5000000.gz:md5,e58e124073689212e5311fbd8ccce415", + "5000001-6000000.gz:md5,db27434dc8be9557f97aa09a95126836", + "6000001-7000000.gz:md5,a5131e3ee41b329eb55fb3849ddb1471", + "7000001-8000000.gz:md5,61e1fbe1a82892a0f9f6ee0380fa60bc", + "8000001-9000000.gz:md5,48166dd4518ec21f597b6acca73809bb", + "9000001-10000000.gz:md5,3e416c856f40f36ec0ed3f42a93b2267" + ], + [ + "1-1000000.gz:md5,06b83c3bd2c651c5a8a96f0865d54a53", + "10000001-11000000.gz:md5,79b3348f860370d1697e6d4de128fca3", + "1000001-2000000.gz:md5,f98e31f3e25c0a419ebeee5b17527b00", + "11000001-12000000.gz:md5,5f23214fdef1f7637f6046dc751155ed", + "12000001-13000000.gz:md5,9a4422905679e543a92d47142b1acba1", + "13000001-14000000.gz:md5,c5db99e7d56f2115f8da8fe3af83314a", + "14000001-15000000.gz:md5,66b65af3732c0495dc74f3071203ac2f", + "15000001-16000000.gz:md5,d4c30dc42925cc92dc594d4145544f33", + "2000001-3000000.gz:md5,ff9b3fd1235468c738e9201e2fa98e08", + "3000001-4000000.gz:md5,c649308c7d3b4891a8c6f95e583f3a08", + "4000001-5000000.gz:md5,c53d2fa6695248d0a725ef70325aae91", + "5000001-6000000.gz:md5,5481fb4b60ebd97256f5d52c42aee0bd", + "6000001-7000000.gz:md5,94b82e096bacb091e0ef55bcd08b8ab8", + "7000001-8000000.gz:md5,83f571dfaf8b891bf27208466e6f7d8c", + "8000001-9000000.gz:md5,4f07e30c7d772544bc6e99bac371b97b", + "9000001-10000000.gz:md5,f1439765f6428ae99516d95dc5df3926" + ], + [ + "1-1000000.gz:md5,dab3bbb82e7ecc5430319b7723b88436", + "10000001-11000000.gz:md5,a1af0b4ce9c2ff301ac0a935a4189c58", + "1000001-2000000.gz:md5,8a70e4d08b14a4cf19b03a0556a6cae8", + "11000001-12000000.gz:md5,e866bb880cd79e612dc2081635368017", + "12000001-13000000.gz:md5,1b5be2ca310afd8289561331d19ddf07", + "13000001-14000000.gz:md5,907d2bb3f6b9b75fca9e40f1220c0cb4", + "2000001-3000000.gz:md5,783bcc5957ca4853853c5cda0418dbcd", + "3000001-4000000.gz:md5,cb2df81caa4a20215bb793ca2a792e01", + "4000001-5000000.gz:md5,2046030a187c0a86c9be02531aea0ed7", + "5000001-6000000.gz:md5,395a977401540eb90607b31ecc786a0f", + "6000001-7000000.gz:md5,e6a97128bc38649fcfa5dcb0032a570e", + "7000001-8000000.gz:md5,08804df16d4cdeb5a733d2d6b07b058e", + "8000001-9000000.gz:md5,bba084d260c12613403d144073105d9b", + "9000001-10000000.gz:md5,c0775c413018ed7964f3042112fe4e26" + ], + [ + "1-1000000.gz:md5,710a2c1ad83c3c9751a0f152cd98f02c", + "10000001-11000000.gz:md5,ac93a92e62043bddaa59973e26dac8e0", + "1000001-2000000.gz:md5,232ee5ab6c7295007ffc760f361e4c07", + "11000001-12000000.gz:md5,07e49ac7b78fbc29cd920b11a4d21afc", + "12000001-13000000.gz:md5,7849822cf3df022e9f273fb6a928aa8b", + "13000001-14000000.gz:md5,e3d3f0ee264694c72b3b989a542c5694", + "14000001-15000000.gz:md5,e5771b6c2fefe9f62b23c71ab873f94f", + "15000001-16000000.gz:md5,fa02114035f63a504c48005c498f8ec3", + "16000001-17000000.gz:md5,ef0fd03281afc2e636a64fd61df8c4dc", + "17000001-18000000.gz:md5,4483a6d74a07b1101dccee71a22213ed", + "2000001-3000000.gz:md5,f164cbfdc8bc12efd7c26df3935fe190", + "3000001-4000000.gz:md5,ff05a42801004994a4f30f081bc8945a", + "4000001-5000000.gz:md5,b63f51d54dc3cb4b214b54527dfa4234", + "5000001-6000000.gz:md5,6945f59a1fd50f1dfa8a2f1e55fecc12", + "6000001-7000000.gz:md5,3f27a3cb19ece2a9e87da6fe6476faaa", + "7000001-8000000.gz:md5,05bffc6e8af7a80fdd6cbc53e5512d0c", + "8000001-9000000.gz:md5,8a3594ee1eb15d7aa8edeb325e485dce", + "9000001-10000000.gz:md5,338bce56200690d450d5fcac65a91be7" + ], + [ + "1-1000000.gz:md5,779fda1352c0b1d635aa752c185e6ce2" + ], + [ + "1-1000000.gz:md5,267b3134411641d12bb6efcfd5e9d48d", + "10000001-11000000.gz:md5,4f7473abf622b57ca3c8d6de098759f7", + "1000001-2000000.gz:md5,5587d56269638b9cc88bdb7ae5dacc58", + "11000001-12000000.gz:md5,3da928f2caf09b9e3df58f9d1be2c541", + "12000001-13000000.gz:md5,eae3125082e1674f40fcfb8bb7da23e3", + "13000001-14000000.gz:md5,6a91eccbe0cf05737e2d2971d5618876", + "14000001-15000000.gz:md5,60c9b08ad4f674c0394a7c16dee2713e", + "15000001-16000000.gz:md5,b36c052923f3d3e3cc8db9c2852e1e16", + "16000001-17000000.gz:md5,0d0d7a8735aadda492912d032fd8733b", + "17000001-18000000.gz:md5,cf57648ef4cbc3325cff87e6b4b89653", + "18000001-19000000.gz:md5,5c649205050bfcbb4414db329659dadc", + "19000001-20000000.gz:md5,48838329ef3e8c26dd8b1ba82f925704", + "20000001-21000000.gz:md5,c7e1643d2880881fe5d44f718b8e6755", + "2000001-3000000.gz:md5,94849146caeca44e256aec58f1a914b8", + "3000001-4000000.gz:md5,4601fbb22fda2cbc4ed397707f8f7afd", + "4000001-5000000.gz:md5,7c8617d40b6d2e9a37802691f64f775e", + "5000001-6000000.gz:md5,0e81ba81f807f8429351e46dd8385e0e", + "6000001-7000000.gz:md5,6e86fccda1dc539e291bd5768ffd0dad", + "7000001-8000000.gz:md5,f0bf0306012d738fc71f7a26d7af2d81", + "8000001-9000000.gz:md5,0f3da0d0c714760bc2c39bc6696b74d1", + "9000001-10000000.gz:md5,4e0e2ba92378f6e1f261d5e59d8e3d9f" + ], + [ + "1-1000000.gz:md5,83a0a200cb053b1f28e41fe62068d49a", + "10000001-11000000.gz:md5,2f84058256242378b7d14ef526ed42ea", + "1000001-2000000.gz:md5,c655f70a1d16eef55a5003cdb63434ab", + "11000001-12000000.gz:md5,6d5c34d7a61fa4764c546d1b46a5c90d", + "12000001-13000000.gz:md5,84fceee383bb28edb8d749c744a10932", + "13000001-14000000.gz:md5,aadd01464ca36c813a831f5c2016ba02", + "14000001-15000000.gz:md5,94a5325ca01192d5eea218b30f933ec4", + "15000001-16000000.gz:md5,de84954b08d570585a448d8831c12e6d", + "16000001-17000000.gz:md5,be4954afe2236d62226307f8c9f95820", + "17000001-18000000.gz:md5,8fdacfb47fc5728eb27b22bbb8c9b301", + "2000001-3000000.gz:md5,83552c17d88c3986c56c681b0b49bb97", + "3000001-4000000.gz:md5,deb3bc27c8d431d60fc89a6fe49bcbc6", + "4000001-5000000.gz:md5,98de5bbb694c73f7ffde16fb92069117", + "5000001-6000000.gz:md5,7c5a116261bf41309b18c22b0cba5f52", + "6000001-7000000.gz:md5,fb0d2dc71bd0c9263ff23825d8a4ef64", + "7000001-8000000.gz:md5,2375dcd7787e7ca5d26442cea0ff6710", + "8000001-9000000.gz:md5,979f986c27b91a62873e639e3ebeae43", + "9000001-10000000.gz:md5,b80f6906a724e4b0d6c21dd4c77663fd" + ], + "chr_synonyms.txt:md5,8a6fce00cc7817ec727c49b7954f10bc", + "info.txt:md5,33ccb74a030a9a345051628c337cb8af" + ] + ] + ] + ] + ], + "versions": [ + "versions.yml:md5,954fd177c394ba167d575a6aac47390b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T17:20:01.921038" + }, + "celegans - download - stub": { + "content": [ + { + "0": [ + [ + { + "id": "111_WBcel235" + }, + [ + + ] + ] + ], + "1": [ + "versions.yml:md5,954fd177c394ba167d575a6aac47390b" + ], + "cache": [ + [ + { + "id": "111_WBcel235" + }, + [ + + ] + ] + ], + "versions": [ + "versions.yml:md5,954fd177c394ba167d575a6aac47390b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T18:09:54.909036" + } +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/download/tests/nextflow.config b/modules/nf-core/ensemblvep/download/tests/nextflow.config new file mode 100644 index 00000000..882bce41 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/nextflow.config @@ -0,0 +1,12 @@ +params { + vep_cache_version = "111" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" +} + +process { + withName: ENSEMBLVEP_DOWNLOAD { + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } + } +} diff --git a/modules/nf-core/ensemblvep/download/tests/tags.yml b/modules/nf-core/ensemblvep/download/tests/tags.yml new file mode 100644 index 00000000..26671f3d --- /dev/null +++ b/modules/nf-core/ensemblvep/download/tests/tags.yml @@ -0,0 +1,2 @@ +ensemblvep/download: + - "modules/nf-core/ensemblvep/download/**" diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml new file mode 100644 index 00000000..91457c05 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_vep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=111.0 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf new file mode 100644 index 00000000..e82471aa --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -0,0 +1,70 @@ +process ENSEMBLVEP_VEP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:111.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:111.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(vcf), path(custom_extra_files) + val genome + val species + val cache_version + path cache + tuple val(meta2), path(fasta) + path extra_files + + output: + tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf + tuple val(meta), path("*.tab.gz") , optional:true, emit: tab + tuple val(meta), path("*.json.gz") , optional:true, emit: json + path "*.summary.html" , optional:true, emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' + def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip' + def prefix = task.ext.prefix ?: "${meta.id}" + def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ + vep \\ + -i $vcf \\ + -o ${prefix}.${file_extension}.gz \\ + $args \\ + $compress_cmd \\ + $reference \\ + --assembly $genome \\ + --species $species \\ + --cache \\ + --cache_version $cache_version \\ + --dir_cache $dir_cache \\ + --fork $task.cpus + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + echo "" | gzip > ${prefix}.tab.gz + echo "" | gzip > ${prefix}.json.gz + touch ${prefix}.summary.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml new file mode 100644 index 00000000..d8ff8d14 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -0,0 +1,92 @@ +name: ensemblvep_vep +description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. +keywords: + - annotation + - vcf + - json + - tab +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + - custom_extra_files: + type: file + description: | + extra sample-specific files to be used with the `--custom` flag to be configured with ext.args + (optional) + - genome: + type: string + description: | + which genome to annotate with + - species: + type: string + description: | + which species to annotate with + - cache_version: + type: integer + description: | + which version of the cache to annotate with + - cache: + type: file + description: | + path to VEP cache (optional) + - meta2: + type: map + description: | + Groovy Map containing fasta reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" + - extra_files: + type: file + description: | + path to file(s) needed for plugins (optional) +output: + - vcf: + type: file + description: | + annotated vcf (optional) + pattern: "*.ann.vcf.gz" + - tab: + type: file + description: | + tab file with annotated variants (optional) + pattern: "*.ann.tab.gz" + - json: + type: file + description: | + json file with annotated variants (optional) + pattern: "*.ann.json.gz" + - report: + type: file + description: VEP report file + pattern: "*.html" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test new file mode 100644 index 00000000..4aff84a3 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test @@ -0,0 +1,114 @@ +nextflow_process { + + name "Test Process ENSEMBLVEP_VEP" + script "../main.nf" + process "ENSEMBLVEP_VEP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "ensemblvep" + tag "ensemblvep/vep" + tag "ensemblvep/download" + + test("test_ensemblvep_vep_fasta_vcf") { + config "./vcf.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + + process { + """ + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") } + ) + } + + } + + test("test_ensemblvep_vep_fasta_tab_gz") { + config "./tab.gz.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + + process { + """ + input[0] = Channel.of([ + [id:"111_WBcel235"], + params.vep_genome, + params.vep_species, + params.vep_cache_version + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v111.0") } + ) + } + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap new file mode 100644 index 00000000..f937b299 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap @@ -0,0 +1,26 @@ +{ + "test_ensemblvep_vep_fasta_tab_gz": { + "content": [ + [ + "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T17:35:20.694114" + }, + "test_ensemblvep_vep_fasta_vcf": { + "content": [ + [ + "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-04-15T17:34:41.093843" + } +} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config new file mode 100644 index 00000000..882bce41 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config @@ -0,0 +1,12 @@ +params { + vep_cache_version = "111" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" +} + +process { + withName: ENSEMBLVEP_DOWNLOAD { + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" } + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/tab.gz.config b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config new file mode 100644 index 00000000..40eb03e5 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config @@ -0,0 +1,5 @@ +process { + withName: ENSEMBLVEP_VEP { + ext.args = '--tab --compress_output bgzip' + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/tags.yml b/modules/nf-core/ensemblvep/vep/tests/tags.yml new file mode 100644 index 00000000..4aa4aa45 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/tags.yml @@ -0,0 +1,2 @@ +ensemblvep/vep: + - "modules/nf-core/ensemblvep/vep/**" diff --git a/modules/nf-core/ensemblvep/vep/tests/vcf.config b/modules/nf-core/ensemblvep/vep/tests/vcf.config new file mode 100644 index 00000000..ad8955a3 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/vcf.config @@ -0,0 +1,5 @@ +process { + withName: ENSEMBLVEP_VEP { + ext.args = '--vcf' + } +} diff --git a/modules/nf-core/gatk4/applybqsr/environment.yml b/modules/nf-core/gatk4/applybqsr/environment.yml new file mode 100644 index 00000000..80c811e6 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_applybqsr +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/applybqsr/main.nf b/modules/nf-core/gatk4/applybqsr/main.nf new file mode 100644 index 00000000..78db9d7f --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/main.nf @@ -0,0 +1,51 @@ +process GATK4_APPLYBQSR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ApplyBQSR \\ + --input $input \\ + --output ${prefix}.${input.getExtension()} \\ + --reference $fasta \\ + --bqsr-recal-file $bqsr_table \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/applybqsr/meta.yml b/modules/nf-core/gatk4/applybqsr/meta.yml new file mode 100644 index 00000000..ab9efea3 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/meta.yml @@ -0,0 +1,74 @@ +name: gatk4_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bam + - base quality score recalibration + - bqsr + - cram + - gatk4 +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Recalibrated CRAM file + pattern: "*.{cram}" +authors: + - "@yocra3" + - "@FriederikeHanssen" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/applybqsr/tests/main.nf.test b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test new file mode 100644 index 00000000..3d9c6204 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test @@ -0,0 +1,95 @@ +nextflow_process { + + name "Test Process GATK4_APPLYBQSR" + script "../main.nf" + process "GATK4_APPLYBQSR" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/applybqsr" + + test("sarscov2 - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - intervals") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - cram") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/test.baserecalibrator.table', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.cram[0][1]).name).match("test.cram") } + ) + } + } + +} diff --git a/modules/nf-core/gatk4/applybqsr/tests/main.nf.test.snap b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test.snap new file mode 100644 index 00000000..a387039d --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/tests/main.nf.test.snap @@ -0,0 +1,102 @@ +{ + "sarscov2 - bam - intervals": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,096d269e17f4ae53f765013479240db8" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,bb2a060a0280c812fba3c74b1707b350" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,096d269e17f4ae53f765013479240db8" + ] + ], + "cram": [ + + ], + "versions": [ + "versions.yml:md5,bb2a060a0280c812fba3c74b1707b350" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:21:48.144461" + }, + "sarscov2 - cram": { + "content": [ + [ + "versions.yml:md5,bb2a060a0280c812fba3c74b1707b350" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:22:09.308602" + }, + "test.cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-12-09T03:10:46.70859771" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,022271b9ce0a07579282a2a5c1186513" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,bb2a060a0280c812fba3c74b1707b350" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,022271b9ce0a07579282a2a5c1186513" + ] + ], + "cram": [ + + ], + "versions": [ + "versions.yml:md5,bb2a060a0280c812fba3c74b1707b350" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:21:28.719225" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/applybqsr/tests/tags.yml b/modules/nf-core/gatk4/applybqsr/tests/tags.yml new file mode 100644 index 00000000..8da9292d --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/applybqsr: + - "modules/nf-core/gatk4/applybqsr/**" diff --git a/modules/nf-core/gatk4/baserecalibrator/environment.yml b/modules/nf-core/gatk4/baserecalibrator/environment.yml new file mode 100644 index 00000000..365e5c63 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_baserecalibrator +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf new file mode 100644 index 00000000..1a299862 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -0,0 +1,63 @@ +process GATK4_BASERECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + path fasta + path fai + path dict + path known_sites + path known_sites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BaseRecalibrator \\ + --input $input \\ + --output ${prefix}.table \\ + --reference $fasta \\ + $interval_command \\ + $sites_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.table + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/baserecalibrator/meta.yml b/modules/nf-core/gatk4/baserecalibrator/meta.yml new file mode 100644 index 00000000..8252b8c2 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/meta.yml @@ -0,0 +1,77 @@ +name: gatk4_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - base quality score recalibration + - table + - bqsr + - gatk4 + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - known_sites: + type: file + description: VCF files with known sites for indels / snps (optional) + pattern: "*.vcf.gz" + - known_sites_tbi: + type: file + description: Tabix index of the known_sites (optional) + pattern: "*.vcf.gz.tbi" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - table: + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test new file mode 100644 index 00000000..fbd91bea --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test @@ -0,0 +1,166 @@ +nextflow_process { + + name "Test Process GATK4_BASERECALIBRATOR" + script "../main.nf" + process "GATK4_BASERECALIBRATOR" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/baserecalibrator" + + test("sarscov2 - bam") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - intervals") { + when { + process { + """ + + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - multiple sites") { + when { + process { + """ + + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) + input[4] = [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ] + input[5] = [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [], + [], + [] + ] + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - cram ") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true) + input[5] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test.snap b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test.snap new file mode 100644 index 00000000..8291304b --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/tests/main.nf.test.snap @@ -0,0 +1,167 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:16:00.04396" + }, + "sarscov2 - bam - intervals": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,9ecb5f00a2229291705addc09c0ec231" + ] + ], + "1": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,9ecb5f00a2229291705addc09c0ec231" + ] + ], + "versions": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:15:17.899391" + }, + "sarscov2 - bam - multiple sites": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "1": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "versions": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:15:47.770383" + }, + "homo_sapiens - cram ": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,35d89a3811aa31711fc9815b6b80e6ec" + ] + ], + "1": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,35d89a3811aa31711fc9815b6b80e6ec" + ] + ], + "versions": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:16:42.135898" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "1": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ], + "table": [ + [ + { + "id": "test" + }, + "test.table:md5,e2e43abdc0c943c1a54dae816d0b9ea7" + ] + ], + "versions": [ + "versions.yml:md5,4ff697a3a05bb4d30701e6750c246ed2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-02-13T16:14:57.629443" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/baserecalibrator/tests/tags.yml b/modules/nf-core/gatk4/baserecalibrator/tests/tags.yml new file mode 100644 index 00000000..648b4626 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/baserecalibrator: + - "modules/nf-core/gatk4/baserecalibrator/**" diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 00000000..d6fbe2e7 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_bedtointervallist +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf new file mode 100644 index 00000000..68863d67 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -0,0 +1,56 @@ +process GATK4_BEDTOINTERVALLIST { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(bed) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.interval_list'), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ + --INPUT $bed \\ + --OUTPUT ${prefix}.interval_list \\ + --SEQUENCE_DICTIONARY $dict \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml new file mode 100644 index 00000000..187da885 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -0,0 +1,51 @@ +name: gatk4_bedtointervallist +description: Creates an interval list from a bed file and a reference dict +keywords: + - bed + - bedtointervallist + - gatk4 + - interval list +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bed: + type: file + description: Input bed file + pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary + pattern: "*.dict" +output: + - interval_list: + type: file + description: gatk interval list file + pattern: "*.interval_list" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test new file mode 100644 index 00000000..2289f73f --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process GATK4_BEDTOINTERVALLIST" + script "../main.nf" + process "GATK4_BEDTOINTERVALLIST" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + + test("test_gatk4_bedtointervallist") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] + ] + input[1] = [ [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap new file mode 100644 index 00000000..48c322fd --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_bedtointervallist": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "1": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ], + "interval_list": [ + [ + { + "id": "test" + }, + "test.interval_list:md5,e51101c9357fb2d59fd30e370eefa39c" + ] + ], + "versions": [ + "versions.yml:md5,29a18c36f27584eb5a5f2f5457088b3b" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T14:20:12.168775" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml new file mode 100644 index 00000000..b4d54f12 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/bedtointervallist: + - "modules/nf-core/gatk4/bedtointervallist/**" diff --git a/modules/nf-core/gatk4/combinegvcfs/environment.yml b/modules/nf-core/gatk4/combinegvcfs/environment.yml new file mode 100644 index 00000000..e4510f97 --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_combinegvcfs +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/combinegvcfs/main.nf b/modules/nf-core/gatk4/combinegvcfs/main.nf new file mode 100644 index 00000000..df8ade11 --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/main.nf @@ -0,0 +1,48 @@ +process GATK4_COMBINEGVCFS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(vcf_idx) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{"--variant $it"}.join(' ') + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CombineGVCFs \\ + $input_list \\ + --output ${prefix}.combined.g.vcf.gz \\ + --reference ${fasta} \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/combinegvcfs/meta.yml b/modules/nf-core/gatk4/combinegvcfs/meta.yml new file mode 100644 index 00000000..f13ddde7 --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/meta.yml @@ -0,0 +1,61 @@ +name: gatk4_combinegvcfs +description: Combine per-sample gVCF files produced by HaplotypeCaller into a multi-sample gVCF file +keywords: + - gvcf + - gatk4 + - vcf + - combinegvcfs + - short variant discovery +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: Compressed VCF files + pattern: "*.vcf.gz" + - vcf_idx: + type: file + description: VCF Index file + pattern: "*.vcf.gz.idx" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: FASTA index file + pattern: "*.fasta.fai" + - dict: + type: file + description: FASTA dictionary file + pattern: "*.dict" +output: + - combined_gvcf: + type: file + description: Compressed Combined GVCF file + pattern: "*.combined.g.vcf.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" + - "@maxulysse" +maintainers: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test new file mode 100644 index 00000000..56f54d0f --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process GATK4_COMBINEGVCFS" + script "../main.nf" + process "GATK4_COMBINEGVCFS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/combinegvcfs" + + test("test_gatk4_combinegvcfs") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + // File has a timestamp in it and is in binary format, need to compare only a portion of the file (after header) + { assert snapshot(path(process.out.combined_gvcf[0][1]).linesGzip[102..-1], + process.out.versions + ).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test.snap b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test.snap new file mode 100644 index 00000000..1f04b8bd --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/tests/main.nf.test.snap @@ -0,0 +1,146 @@ +{ + "test_gatk4_combinegvcfs": { + "content": [ + [ + "chr22\t1\t.\tA\t\t.\t.\tEND=1951\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t1952\t.\tT\t\t.\t.\tEND=1953\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,25", + "chr22\t1954\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:1:0:1:0,0,0", + "chr22\t1955\t.\tT\t\t.\t.\tEND=1956\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,33", + "chr22\t1957\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,68", + "chr22\t1958\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,34", + "chr22\t1959\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:3:9:3:0,9,101", + "chr22\t1960\t.\tA\t\t.\t.\tEND=1961\tGT:DP:GQ:MIN_DP:PL\t./.:3:6:2:0,6,58", + "chr22\t1962\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:0:2:0,0,0", + "chr22\t1963\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:6:15:6:0,15,225", + "chr22\t1964\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:6:18:6:0,18,197", + "chr22\t1965\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:11:33:11:0,33,335", + "chr22\t1966\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:25:75:25:0,75,687", + "chr22\t1967\t.\tG\t\t.\t.\tEND=1981\tGT:DP:GQ:MIN_DP:PL\t./.:107:99:35:0,105,934", + "chr22\t1982\trs7287620\tA\tG,\t.\t.\tBaseQRankSum=0.00;DP=211;ExcessHet=3.01;MQRankSum=0.00;RAW_MQandDP=759600,211;ReadPosRankSum=1.09\tGT:AD:DP:GQ:PL:SB\t./.:155,30,0:185:99:184,0,3246,648,3335,3984:77,78,15,15", + "chr22\t1983\t.\tA\t\t.\t.\tEND=2102\tGT:DP:GQ:MIN_DP:PL\t./.:476:99:209:0,120,1800", + "chr22\t2103\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:206:93:206:0,93,1395", + "chr22\t2104\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:194:48:194:0,48,720", + "chr22\t2105\t.\tC\t\t.\t.\tEND=2106\tGT:DP:GQ:MIN_DP:PL\t./.:177:30:171:0,30,450", + "chr22\t2107\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:156:24:156:0,24,360", + "chr22\t2108\t.\tT\t\t.\t.\tEND=2110\tGT:DP:GQ:MIN_DP:PL\t./.:110:21:96:0,21,315", + "chr22\t2111\t.\tC\t\t.\t.\tEND=2113\tGT:DP:GQ:MIN_DP:PL\t./.:47:15:29:0,15,225", + "chr22\t2114\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:14:12:14:0,12,180", + "chr22\t2115\t.\tA\t\t.\t.\tEND=2117\tGT:DP:GQ:MIN_DP:PL\t./.:10:6:8:0,6,90", + "chr22\t2118\t.\tC\t\t.\t.\tEND=2122\tGT:DP:GQ:MIN_DP:PL\t./.:7:0:3:0,0,0", + "chr22\t2123\t.\tC\tG,\t.\t.\tBaseQRankSum=0.967;DP=4;ExcessHet=3.01;MQRankSum=0.00;RAW_MQandDP=14400,4;ReadPosRankSum=0.967\tGT:AD:DP:GQ:PL:SB\t./.:1,2,0:3:1:41,0,1,43,7,50:0,1,1,1", + "chr22\t2124\t.\tG\t\t.\t.\tEND=2716\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t2717\t.\tT\t\t.\t.\tEND=2721\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t2722\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:10:30:10:0,30,247", + "chr22\t2723\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:12:36:12:0,36,297", + "chr22\t2724\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:18:54:18:0,54,445", + "chr22\t2725\t.\tA\t\t.\t.\tEND=2836\tGT:DP:GQ:MIN_DP:PL\t./.:250:99:36:0,108,890", + "chr22\t2837\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:116:72:116:0,72,1080", + "chr22\t2838\t.\tA\t\t.\t.\tEND=2907\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t2908\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:8:24:8:0,24,260", + "chr22\t2909\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:16:48:16:0,48,546", + "chr22\t2910\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:21:63:21:0,63,678", + "chr22\t2911\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:28:84:28:0,84,894", + "chr22\t2912\t.\tC\t\t.\t.\tEND=3119\tGT:DP:GQ:MIN_DP:PL\t./.:848:99:33:0,99,1061", + "chr22\t3120\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:190:54:190:0,54,810", + "chr22\t3121\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:178:39:178:0,39,585", + "chr22\t3122\t.\tC\t\t.\t.\tEND=3124\tGT:DP:GQ:MIN_DP:PL\t./.:147:15:137:0,15,225", + "chr22\t3125\t.\tG\t\t.\t.\tEND=3126\tGT:DP:GQ:MIN_DP:PL\t./.:115:9:110:0,9,135", + "chr22\t3127\t.\tT\t\t.\t.\tEND=3139\tGT:DP:GQ:MIN_DP:PL\t./.:5:6:3:0,6,90", + "chr22\t3140\t.\tA\tG,\t.\t.\tDP=2;ExcessHet=3.01;RAW_MQandDP=7200,2\tGT:AD:DP:GQ:PL:SB\t./.:0,2,0:2:6:49,6,0,49,6,49:0,0,1,1", + "chr22\t3141\t.\tG\t\t.\t.\tEND=3144\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t3145\t.\tT\t\t.\t.\tEND=3190\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t3191\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t3192\t.\tA\t\t.\t.\tEND=3196\tGT:DP:GQ:MIN_DP:PL\t./.:6:18:6:0,18,148", + "chr22\t3197\t.\tT\t\t.\t.\tEND=3200\tGT:DP:GQ:MIN_DP:PL\t./.:8:24:8:0,24,196", + "chr22\t3201\t.\tT\t\t.\t.\tEND=3207\tGT:DP:GQ:MIN_DP:PL\t./.:12:36:12:0,36,297", + "chr22\t3208\t.\tA\t\t.\t.\tEND=3209\tGT:DP:GQ:MIN_DP:PL\t./.:14:42:14:0,42,346", + "chr22\t3210\t.\tT\t\t.\t.\tEND=3217\tGT:DP:GQ:MIN_DP:PL\t./.:16:48:16:0,48,396", + "chr22\t3218\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:18:54:18:0,54,445", + "chr22\t3219\t.\tA\t\t.\t.\tEND=3265\tGT:DP:GQ:MIN_DP:PL\t./.:20:60:20:0,60,493", + "chr22\t3266\trs4008589\tT\tC,\t.\t.\tDP=20;ExcessHet=3.01;RAW_MQandDP=72000,20\tGT:AD:DP:GQ:PL:SB\t./.:0,20,0:20:60:495,60,0,495,60,495:0,0,10,10", + "chr22\t3267\t.\tT\t\t.\t.\tEND=3302\tGT:DP:GQ:MIN_DP:PL\t./.:20:60:20:0,60,493", + "chr22\t3303\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:20:54:20:0,54,810", + "chr22\t3304\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:20:42:20:0,42,630", + "chr22\t3305\t.\tT\t\t.\t.\tEND=3308\tGT:DP:GQ:MIN_DP:PL\t./.:20:36:20:0,36,540", + "chr22\t3309\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:20:24:20:0,24,360", + "chr22\t3310\t.\tA\t\t.\t.\tEND=3317\tGT:DP:GQ:MIN_DP:PL\t./.:16:18:12:0,18,270", + "chr22\t3318\t.\tG\t\t.\t.\tEND=3320\tGT:DP:GQ:MIN_DP:PL\t./.:8:0:6:0,0,0", + "chr22\t3321\t.\tT\t\t.\t.\tEND=3351\tGT:DP:GQ:MIN_DP:PL\t./.:1:3:1:0,3,24", + "chr22\t3352\t.\tT\t\t.\t.\tEND=3354\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,58", + "chr22\t3355\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:0:2:0,0,10", + "chr22\t3356\t.\tT\t\t.\t.\tEND=3365\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,47", + "chr22\t3366\t.\tT\t\t.\t.\tEND=3373\tGT:DP:GQ:MIN_DP:PL\t./.:3:9:3:0,9,83", + "chr22\t3374\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,135", + "chr22\t3375\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:4:0:4:0,0,19", + "chr22\t3376\t.\tA\t\t.\t.\tEND=3377\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,124", + "chr22\t3378\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:4:0:4:0,0,80", + "chr22\t3379\t.\tG\t\t.\t.\tEND=3381\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,122", + "chr22\t3382\t.\tC\t\t.\t.\tEND=3385\tGT:DP:GQ:MIN_DP:PL\t./.:5:15:5:0,15,146", + "chr22\t3386\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:5:0:5:0,0,112", + "chr22\t3387\t.\tA\t\t.\t.\tEND=3390\tGT:DP:GQ:MIN_DP:PL\t./.:7:21:7:0,21,193", + "chr22\t3391\t.\tA\t\t.\t.\tEND=3392\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,284", + "chr22\t3393\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,237", + "chr22\t3394\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,260", + "chr22\t3395\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:2:9:0,2,225", + "chr22\t3396\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:0:9:0,0,28", + "chr22\t3397\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,228", + "chr22\t3398\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,286", + "chr22\t3399\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,237", + "chr22\t3400\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:4:9:0,4,223", + "chr22\t3401\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,285", + "chr22\t3402\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:3:9:0,3,206", + "chr22\t3403\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:4:9:0,4,238", + "chr22\t3404\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:9:27:9:0,27,305", + "chr22\t3405\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:12:2:12:0,2,295", + "chr22\t3406\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:19:0:19:0,0,259", + "chr22\t3407\t.\tT\t\t.\t.\tEND=3408\tGT:DP:GQ:MIN_DP:PL\t./.:22:42:22:0,42,577", + "chr22\t3409\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:22:43:22:0,43,605", + "chr22\t3410\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:22:66:22:0,66,642", + "chr22\t3411\t.\tT\t\t.\t.\tEND=3412\tGT:DP:GQ:MIN_DP:PL\t./.:23:46:23:0,46,629", + "chr22\t3413\t.\tT\tG,TG,\t.\t.\tBaseQRankSum=-1.318e+00;DP=17;ExcessHet=3.01;MQRankSum=0.00;RAW_MQandDP=61200,17;ReadPosRankSum=2.37\tGT:AD:DP:GQ:PL:SB\t./.:8,5,2,0:15:44:86,0,108,44,85,250,107,136,225,271:6,2,6,1", + "chr22\t3414\t.\tG\t\t.\t.\tEND=3417\tGT:DP:GQ:MIN_DP:PL\t./.:17:51:17:0,51,478", + "chr22\t3418\t.\tC\t\t.\t.\tEND=3420\tGT:DP:GQ:MIN_DP:PL\t./.:18:54:18:0,54,506", + "chr22\t3421\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:19:57:19:0,57,522", + "chr22\t3422\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:28:84:28:0,84,811", + "chr22\t3423\t.\tA\t\t.\t.\tEND=3631\tGT:DP:GQ:MIN_DP:PL\t./.:1099:99:51:0,120,1800", + "chr22\t3632\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:99:96:99:0,96,1440", + "chr22\t3633\t.\tT\t\t.\t.\tEND=3634\tGT:DP:GQ:MIN_DP:PL\t./.:90:51:85:0,51,765", + "chr22\t3635\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:74:45:74:0,45,675", + "chr22\t3636\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:69:39:69:0,39,585", + "chr22\t3637\t.\tA\t\t.\t.\tEND=3644\tGT:DP:GQ:MIN_DP:PL\t./.:44:3:17:0,3,45", + "chr22\t3645\t.\tC\t\t.\t.\tEND=4465\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4466\t.\tA\t\t.\t.\tEND=4487\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4488\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4489\t.\tT\t\t.\t.\tEND=4493\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4494\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4495\t.\tA\t\t.\t.\tEND=4498\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,48", + "chr22\t4499\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:6:18:6:0,18,148", + "chr22\t4500\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:10:30:10:0,30,247", + "chr22\t4501\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:14:42:14:0,42,346", + "chr22\t4502\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:24:72:24:0,72,593", + "chr22\t4503\t.\tG\t\t.\t.\tEND=4507\tGT:DP:GQ:MIN_DP:PL\t./.:32:90:30:0,90,742", + "chr22\t4508\t.\tG\t\t.\t.\tEND=4575\tGT:DP:GQ:MIN_DP:PL\t./.:60:99:34:0,102,841", + "chr22\t4576\t.\tT\t\t.\t.\tEND=4578\tGT:DP:GQ:MIN_DP:PL\t./.:48:90:48:0,90,1350", + "chr22\t4579\t.\tA\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:48:84:48:0,84,1260", + "chr22\t4580\t.\tA\t\t.\t.\tEND=4582\tGT:DP:GQ:MIN_DP:PL\t./.:42:54:40:0,54,810", + "chr22\t4583\t.\tG\t\t.\t.\tEND=4584\tGT:DP:GQ:MIN_DP:PL\t./.:38:48:36:0,48,720", + "chr22\t4585\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:34:42:34:0,42,630", + "chr22\t4586\t.\tC\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:32:24:32:0,24,360", + "chr22\t4587\t.\tA\t\t.\t.\tEND=4597\tGT:DP:GQ:MIN_DP:PL\t./.:16:12:4:0,12,99", + "chr22\t4598\t.\tT\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4599\t.\tG\t\t.\t.\tEND=4604\tGT:DP:GQ:MIN_DP:PL\t./.:4:12:4:0,12,98", + "chr22\t4605\t.\tG\t\t.\t.\t.\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0", + "chr22\t4606\t.\tT\t\t.\t.\tEND=4607\tGT:DP:GQ:MIN_DP:PL\t./.:2:6:2:0,6,49", + "chr22\t4608\t.\tA\t\t.\t.\tEND=40001\tGT:DP:GQ:MIN_DP:PL\t./.:0:0:0:0,0,0" + ], + [ + "versions.yml:md5,d6331ea8e97541d1d439707fe3dd41b2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:41:25.316932" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/combinegvcfs/tests/tags.yml b/modules/nf-core/gatk4/combinegvcfs/tests/tags.yml new file mode 100644 index 00000000..26885b46 --- /dev/null +++ b/modules/nf-core/gatk4/combinegvcfs/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/combinegvcfs: + - "modules/nf-core/gatk4/combinegvcfs/**" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 00000000..78822ad0 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createsequencedictionary +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf new file mode 100644 index 00000000..c7f1d75b --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -0,0 +1,52 @@ +process GATK4_CREATESEQUENCEDICTIONARY { + tag "$fasta" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.dict') , emit: dict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6144 + if (!task.memory) { + log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ + --REFERENCE $fasta \\ + --URI $fasta \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.dict + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml new file mode 100644 index 00000000..f9d70be0 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -0,0 +1,42 @@ +name: gatk4_createsequencedictionary +description: Creates a sequence dictionary for a reference sequence +keywords: + - createsequencedictionary + - dictionary + - fasta + - gatk4 +tools: + - gatk: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fasta,fa}" +output: + - dict: + type: file + description: gatk dictionary file + pattern: "*.{dict}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test new file mode 100644 index 00000000..a8a9c6d2 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process GATK4_CREATESEQUENCEDICTIONARY" + script "../main.nf" + process "GATK4_CREATESEQUENCEDICTIONARY" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/createsequencedictionary" + + test("sarscov2 - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap new file mode 100644 index 00000000..16735f95 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-16T10:16:16.34453" + }, + "sarscov2 - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "1": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ], + "dict": [ + [ + { + "id": "test" + }, + "genome.dict:md5,7362679f176e0f52add03c08f457f646" + ] + ], + "versions": [ + "versions.yml:md5,e60dd34a71fc2029d81dc67ccb5d6be6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-05-16T13:58:25.822068" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml new file mode 100644 index 00000000..035c5e4c --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/createsequencedictionary: + - "modules/nf-core/gatk4/createsequencedictionary/**" diff --git a/modules/nf-core/gatk4/haplotypecaller/environment.yml b/modules/nf-core/gatk4/haplotypecaller/environment.yml new file mode 100644 index 00000000..d4e8d360 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_haplotypecaller +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/haplotypecaller/main.nf b/modules/nf-core/gatk4/haplotypecaller/main.nf new file mode 100644 index 00000000..3043ee07 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/main.nf @@ -0,0 +1,76 @@ +process GATK4_HAPLOTYPECALLER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + tuple val(meta5), path(dbsnp) + tuple val(meta6), path(dbsnp_tbi) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , optional:true, emit: tbi + tuple val(meta), path("*.realigned.bam"), optional:true, emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : "" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + HaplotypeCaller \\ + --input $input \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $dbsnp_command \\ + $interval_command \\ + $dragstr_command \\ + $bamout_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bamout_command = args.contains("--bam-writer-type") ? "--bam-output ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + + def stub_realigned_bam = bamout_command ? "touch ${prefix.replaceAll('.g\\s*$', '')}.realigned.bam" : "" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + ${stub_realigned_bam} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/haplotypecaller/meta.yml b/modules/nf-core/gatk4/haplotypecaller/meta.yml new file mode 100644 index 00000000..703b99a0 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/meta.yml @@ -0,0 +1,108 @@ +name: gatk4_haplotypecaller +description: Call germline SNPs and indels via local re-assembly of haplotypes +keywords: + - gatk4 + - haplotype + - haplotypecaller +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - dragstr_model: + type: file + description: Text file containing the DragSTR model of the used BAM/CRAM file (optional) + pattern: "*.txt" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_reference' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - meta5: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test_dbsnp' ] + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - meta6: + type: map + description: | + Groovy Map containing dbsnp information + e.g. [ id:'test_dbsnp' ] + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + - bam: + type: file + description: Assembled haplotypes and locally realigned reads + pattern: "*.realigned.bam" +authors: + - "@suzannejin" + - "@FriederikeHanssen" +maintainers: + - "@suzannejin" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test new file mode 100644 index 00000000..a124bff5 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test @@ -0,0 +1,142 @@ +// nf-core modules test gatk4/haplotypecaller +nextflow_process { + + name "Test Process GATK4_HAPLOTYPECALLER" + script "../main.nf" + process "GATK4_HAPLOTYPECALLER" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/haplotypecaller" + + test("homo_sapiens - [bam, bai] - fasta - fai - dict") { + + when { + process { + """ + input[0] = [ + [ id:'test_bam' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [], + [] + ] + input[1] = [ [ id:'test_fa' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + // { assert snapshot(process.out).match() }, // Unstable hashes + { assert snapshot(file(process.out.vcf.get(0).get(1)).name).match("gatk_hc_vcf_bam_input") }, + { assert snapshot(file(process.out.tbi.get(0).get(1)).name).match("gatk_hc_vcf_tbi_bam_input") }, + ) + } + + } + + test("homo_sapiens - [cram, crai] - fasta - fai - dict") { + + when { + process { + """ + input[0] = [ + [ id:'test_cram' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], + [] + ] + input[1] = [ [ id:'test_fa' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + // { assert snapshot(process.out).match() }, // Unstable hashes + { assert snapshot(file(process.out.vcf.get(0).get(1)).name).match("gatk_hc_vcf_cram_input") }, + { assert snapshot(file(process.out.tbi.get(0).get(1)).name).match("gatk_hc_vcf_tbi_cram_input") }, + ) + } + + } + + test("homo_sapiens - [cram, crai] - fasta - fai - dict - sites - sites_tbi") { + + when { + process { + """ + input[0] = [ + [ id:'test_cram_sites' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], + [] + ] + input[1] = [ [ id:'test_fa' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) ] + input[4] = [ [ id:'test_sites' ], file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) ] + input[5] = [ [ id:'test_sites_tbi' ], file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + // { assert snapshot(process.out).match() }, // Unstable hashes + { assert snapshot(file(process.out.vcf.get(0).get(1)).name).match("gatk_hc_vcf_cram_input_with_sites") }, + { assert snapshot(file(process.out.tbi.get(0).get(1)).name).match("gatk_hc_vcf_tbi_cram_input_with_sites") }, + ) + } + + } + + test("homo_sapiens - [cram, crai, dragstr_model] - fasta - fai - dict - sites - sites_tbi") { + + when { + process { + """ + input[0] = [ + [ id:'test_cram_sites_dragstr' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_dragstrmodel'], checkIfExists: true) + ] + input[1] = [ [ id:'test_fa' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [ id:'test_fai' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [ id:'test_dict' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) ] + input[4] = [ [ id:'test_sites' ], file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) ] + input[5] = [ [ id:'test_sites_tbi' ], file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + // { assert snapshot(process.out).match() }, // Unstable hashes + { assert snapshot(file(process.out.vcf.get(0).get(1)).name).match("gatk_hc_vcf_cram_dragstr_input_with_sites") }, + { assert snapshot(file(process.out.tbi.get(0).get(1)).name).match("gatk_hc_vcf_tbi_cram_dragstr_input_with_sites") }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test.snap b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test.snap new file mode 100644 index 00000000..375025ee --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/tests/main.nf.test.snap @@ -0,0 +1,82 @@ +{ + "gatk_hc_vcf_cram_dragstr_input_with_sites": { + "content": [ + "test_cram_sites_dragstr.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:24:45.142682" + }, + "gatk_hc_vcf_bam_input": { + "content": [ + "test_bam.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:23:19.203837" + }, + "gatk_hc_vcf_cram_input": { + "content": [ + "test_cram.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:23:48.434615" + }, + "gatk_hc_vcf_cram_input_with_sites": { + "content": [ + "test_cram_sites.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:24:17.147745" + }, + "gatk_hc_vcf_tbi_bam_input": { + "content": [ + "test_bam.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:23:19.23048" + }, + "gatk_hc_vcf_tbi_cram_input": { + "content": [ + "test_cram.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:23:48.45958" + }, + "gatk_hc_vcf_tbi_cram_dragstr_input_with_sites": { + "content": [ + "test_cram_sites_dragstr.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:24:45.154818" + }, + "gatk_hc_vcf_tbi_cram_input_with_sites": { + "content": [ + "test_cram_sites.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T13:24:17.158138" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/haplotypecaller/tests/tags.yml b/modules/nf-core/gatk4/haplotypecaller/tests/tags.yml new file mode 100644 index 00000000..d05bb655 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/haplotypecaller: + - "modules/nf-core/gatk4/haplotypecaller/**" diff --git a/modules/nf-core/gatk4/indexfeaturefile/environment.yml b/modules/nf-core/gatk4/indexfeaturefile/environment.yml new file mode 100644 index 00000000..dccdb15e --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_indexfeaturefile +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/indexfeaturefile/main.nf b/modules/nf-core/gatk4/indexfeaturefile/main.nf new file mode 100644 index 00000000..b1a2698c --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/main.nf @@ -0,0 +1,41 @@ +process GATK4_INDEXFEATUREFILE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(feature_file) + + output: + tuple val(meta), path("*.{tbi,idx}"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IndexFeatureFile \\ + --input $feature_file \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/meta.yml b/modules/nf-core/gatk4/indexfeaturefile/meta.yml new file mode 100644 index 00000000..575c6038 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/meta.yml @@ -0,0 +1,43 @@ +name: gatk4_indexfeaturefile +description: Creates an index for a feature file, e.g. VCF or BED file. +keywords: + - feature + - gatk4 + - index + - indexfeaturefile +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ["BSD-3-clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - feature_file: + type: file + description: VCF/BED file + pattern: "*.{vcf,vcf.gz,bed,bed.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Index for VCF/BED file + pattern: "*.{tbi,idx}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@santiagorevale" +maintainers: + - "@santiagorevale" diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test new file mode 100644 index 00000000..ee99a541 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test @@ -0,0 +1,105 @@ +// nf-core modules test gatk4/indexfeaturefile +nextflow_process { + + name "Test Process GATK4_INDEXFEATUREFILE" + script "../main.nf" + process "GATK4_INDEXFEATUREFILE" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/indexfeaturefile" + + test("test_gatk4_indexfeaturefile_bed") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("geneome.bed.idx") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_bed_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("genome.bed.gz.tbi") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_vcf") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("test.genome.vcf.idx") }, + ) + } + + } + + test("test_gatk4_indexfeaturefile_vcf_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(file(process.out.index.get(0).get(1)).name).match("test.genome.vcf.gz.tbi") }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap new file mode 100644 index 00000000..fc193336 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/main.nf.test.snap @@ -0,0 +1,132 @@ +{ + "genome.bed.gz.tbi": { + "content": [ + "genome.bed.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:03.068725" + }, + "test_gatk4_indexfeaturefile_vcf": { + "content": [ + [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:20.602472" + }, + "geneome.bed.idx": { + "content": [ + "genome.bed.idx" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:56:46.988441" + }, + "test.genome.vcf.gz.tbi": { + "content": [ + "test.genome.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:51.898472" + }, + "test_gatk4_indexfeaturefile_bed_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.bed.gz.tbi:md5,4bc51e2351a6e83f20e13be75861f941" + ] + ], + "1": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ], + "index": [ + [ + { + "id": "test" + }, + "genome.bed.gz.tbi:md5,4bc51e2351a6e83f20e13be75861f941" + ] + ], + "versions": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:03.058351" + }, + "test_gatk4_indexfeaturefile_vcf_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.genome.vcf.gz.tbi:md5,fedd68eaddf8d31257853d9da8325bd3" + ] + ], + "1": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ], + "index": [ + [ + { + "id": "test" + }, + "test.genome.vcf.gz.tbi:md5,fedd68eaddf8d31257853d9da8325bd3" + ] + ], + "versions": [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:51.861697" + }, + "test.genome.vcf.idx": { + "content": [ + "test.genome.vcf.idx" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:57:20.624337" + }, + "test_gatk4_indexfeaturefile_bed": { + "content": [ + [ + "versions.yml:md5,e01e4575236d930ace929eec9c4c80dd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T18:56:46.885162" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml b/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml new file mode 100644 index 00000000..041bd3d3 --- /dev/null +++ b/modules/nf-core/gatk4/indexfeaturefile/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/indexfeaturefile: + - "modules/nf-core/gatk4/indexfeaturefile/**" diff --git a/modules/nf-core/gatk4/intervallisttools/environment.yml b/modules/nf-core/gatk4/intervallisttools/environment.yml new file mode 100644 index 00000000..a4026f98 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_intervallisttools +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf new file mode 100644 index 00000000..400fa038 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -0,0 +1,74 @@ +process GATK4_INTERVALLISTTOOLS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(intervals) + + output: + tuple val(meta), path("*_split/*/*.interval_list"), emit: interval_list + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + + mkdir ${prefix}_split + + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IntervalListTools \\ + --INPUT $intervals \\ + --OUTPUT ${prefix}_split \\ + --TMP_DIR . \\ + $args + + python3 < versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}_split/temp_0001_of_6 + mkdir -p ${prefix}_split/temp_0002_of_6 + mkdir -p ${prefix}_split/temp_0003_of_6 + mkdir -p ${prefix}_split/temp_0004_of_6 + touch ${prefix}_split/temp_0001_of_6/1scattered.interval_list + touch ${prefix}_split/temp_0002_of_6/2scattered.interval_list + touch ${prefix}_split/temp_0003_of_6/3scattered.interval_list + touch ${prefix}_split/temp_0004_of_6/4scattered.interval_list + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/intervallisttools/meta.yml b/modules/nf-core/gatk4/intervallisttools/meta.yml new file mode 100644 index 00000000..748dccfc --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/meta.yml @@ -0,0 +1,45 @@ +name: gatk4_intervallisttools +description: Splits the interval list file into unique, equally-sized interval files and place it under a directory +keywords: + - bed + - gatk4 + - interval_list + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - interval_list: + type: file + description: Interval list file + pattern: "*.interval_list" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - interval_list: + type: file + description: Interval list files + pattern: "*.interval_list" +authors: + - "@praveenraj2018" +maintainers: + - "@praveenraj2018" diff --git a/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test new file mode 100644 index 00000000..2891bf9e --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process GATK4_INTERVALLISTTOOLS" + script "../main.nf" + process "GATK4_INTERVALLISTTOOLS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/bedtointervallist" + tag "gatk4/intervallisttools" + + setup { + run("GATK4_BEDTOINTERVALLIST") { + script "../../bedtointervallist/main.nf" + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)] + ] + input[1] = [ + [ id:'dict' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.dict', checkIfExists: true)] + ] + """ + } + } + } + + test("test_gatk4_intervallisttools") { + + when { + process { + """ + input[0] = GATK4_BEDTOINTERVALLIST.out.interval_list + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("test_gatk4_intervallisttools -stub") { + + options "-stub" + + when { + process { + """ + input[0] = GATK4_BEDTOINTERVALLIST.out.interval_list + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap new file mode 100644 index 00000000..7718ed00 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "test_gatk4_intervallisttools": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,64f6665f9fbd257e4a300ec602f4e995", + "2scattered.interval_list:md5,f515c3da0c6accfd8e7dc33df50855c5", + "3scattered.interval_list:md5,7a918e8c9211b54334587793e8cbae53", + "4scattered.interval_list:md5,1b93105227a7dc81f07101a1efd31498" + ] + ] + ], + "1": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ], + "interval_list": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,64f6665f9fbd257e4a300ec602f4e995", + "2scattered.interval_list:md5,f515c3da0c6accfd8e7dc33df50855c5", + "3scattered.interval_list:md5,7a918e8c9211b54334587793e8cbae53", + "4scattered.interval_list:md5,1b93105227a7dc81f07101a1efd31498" + ] + ] + ], + "versions": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T21:26:22.252885" + }, + "test_gatk4_intervallisttools -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "2scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "3scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "4scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ], + "interval_list": [ + [ + { + "id": "test" + }, + [ + "1scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "2scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "3scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e", + "4scattered.interval_list:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,ff682cc9ad70d65a80280df57b316b03" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T22:15:11.772344" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/intervallisttools/tests/nextflow.config b/modules/nf-core/gatk4/intervallisttools/tests/nextflow.config new file mode 100644 index 00000000..b24b20db --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GATK4_INTERVALLISTTOOLS { + ext.args = '--SCATTER_COUNT 6 --SUBDIVISION_MODE BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW --UNIQUE true --SORT true' + } +} diff --git a/modules/nf-core/gatk4/intervallisttools/tests/tags.yml b/modules/nf-core/gatk4/intervallisttools/tests/tags.yml new file mode 100644 index 00000000..bf85ff5f --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/intervallisttools: + - "modules/nf-core/gatk4/intervallisttools/**" diff --git a/modules/nf-core/gatk4/mergevcfs/environment.yml b/modules/nf-core/gatk4/mergevcfs/environment.yml new file mode 100644 index 00000000..efd9faa2 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_mergevcfs +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf new file mode 100644 index 00000000..9e8d4391 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -0,0 +1,60 @@ +process GATK4_MERGEVCFS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(vcf) + tuple val(meta2), path(dict) + + output: + tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{ "--INPUT $it"}.join(' ') + def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MergeVcfs \\ + $input_list \\ + --OUTPUT ${prefix}.vcf.gz \\ + $reference_command \\ + --TMP_DIR . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/mergevcfs/meta.yml b/modules/nf-core/gatk4/mergevcfs/meta.yml new file mode 100644 index 00000000..996053fc --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/meta.yml @@ -0,0 +1,52 @@ +name: gatk4_mergevcfs +description: Merges several vcf files +keywords: + - gatk4 + - merge + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: Two or more VCF files + pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - dict: + type: file + description: Optional Sequence Dictionary as input + pattern: "*.dict" +output: + - vcf: + type: file + description: merged vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test new file mode 100644 index 00000000..77ace10a --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process GATK4_MERGEVCFS" + script "../main.nf" + process "GATK4_MERGEVCFS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/mergevcfs" + + test("test_gatk4_mergevcfs") { + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs") + }, + ) + } + + } + + test("test_gatk4_mergevcfs_no_dict") { + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs_no_dict") + }, + ) + } + + } + + test("test_gatk4_mergevcfs_no_dict_stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs_no_dict_stub") + }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap new file mode 100644 index 00000000..62cceed5 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_mergevcfs_no_dict_stub": { + "content": [ + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T14:57:40.784590995" + }, + "test_gatk4_mergevcfs": { + "content": [ + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T14:56:42.178255913" + }, + "test_gatk4_mergevcfs_no_dict": { + "content": [ + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T14:57:11.404322124" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/mergevcfs/tests/tags.yml b/modules/nf-core/gatk4/mergevcfs/tests/tags.yml new file mode 100644 index 00000000..d2a74ba2 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/mergevcfs: + - "modules/nf-core/gatk4/mergevcfs/**" diff --git a/modules/nf-core/gatk4/splitncigarreads/environment.yml b/modules/nf-core/gatk4/splitncigarreads/environment.yml new file mode 100644 index 00000000..834d2486 --- /dev/null +++ b/modules/nf-core/gatk4/splitncigarreads/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_splitncigarreads +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/splitncigarreads/main.nf b/modules/nf-core/gatk4/splitncigarreads/main.nf new file mode 100644 index 00000000..e2c287a0 --- /dev/null +++ b/modules/nf-core/gatk4/splitncigarreads/main.nf @@ -0,0 +1,49 @@ +process GATK4_SPLITNCIGARREADS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path('*.bam'), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + SplitNCigarReads \\ + --input $bam \\ + --output ${prefix}.bam \\ + --reference $fasta \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/splitncigarreads/meta.yml b/modules/nf-core/gatk4/splitncigarreads/meta.yml new file mode 100644 index 00000000..f8132746 --- /dev/null +++ b/modules/nf-core/gatk4/splitncigarreads/meta.yml @@ -0,0 +1,73 @@ +name: gatk4_splitncigarreads +description: Splits reads that contain Ns in their cigar string +keywords: + - gatk4 + - merge + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - bam: + type: list + description: BAM/SAM/CRAM file containing reads + pattern: "*.{bam,sam,cram}" + - bai: + type: list + description: BAI/SAI/CRAI index file (optional) + pattern: "*.{bai,sai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'reference' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'reference' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'reference' ] + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - bam: + type: file + description: Output file with split reads (BAM/SAM/CRAM) + pattern: "*.{bam,sam,cram}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/splitncigarreads/tests/main.nf.test b/modules/nf-core/gatk4/splitncigarreads/tests/main.nf.test new file mode 100644 index 00000000..b4910126 --- /dev/null +++ b/modules/nf-core/gatk4/splitncigarreads/tests/main.nf.test @@ -0,0 +1,80 @@ +nextflow_process { + + name "Test Process GATK4_SPLITNCIGARREADS" + script "../main.nf" + process "GATK4_SPLITNCIGARREADS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/splitncigarreads" + + test("test_gatk4_splitncigarreads") { + + when { + process { + """ + input[0] = [ [ id:'test' ], file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists:true), []] + input[1] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam.get(0).get(1)).name).match("test_gatk4_splitncigarreads") } + ) + } + + } + + test("test_gatk4_splitncigarreads_intervals") { + + when { + process { + """ + input[0] = [ [ id:'test' ], file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists:true), file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)] + input[1] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam.get(0).get(1)).name).match("test_gatk4_splitncigarreads_intervals") } + ) + } + + } + + test("test_gatk4_splitncigarreads_stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), [], []] + input[1] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[2] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ] + input[3] = [ [ id:'reference' ], file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam.get(0).get(1)).name).match("test_gatk4_splitncigarreads_stub") } + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/splitncigarreads/tests/main.nf.test.snap b/modules/nf-core/gatk4/splitncigarreads/tests/main.nf.test.snap new file mode 100644 index 00000000..3e581ceb --- /dev/null +++ b/modules/nf-core/gatk4/splitncigarreads/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "test_gatk4_splitncigarreads_intervals": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T08:26:10.91960579" + }, + "test_gatk4_splitncigarreads_stub": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T08:26:32.322712467" + }, + "test_gatk4_splitncigarreads": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T08:25:53.532205456" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/splitncigarreads/tests/tags.yml b/modules/nf-core/gatk4/splitncigarreads/tests/tags.yml new file mode 100644 index 00000000..b5d95313 --- /dev/null +++ b/modules/nf-core/gatk4/splitncigarreads/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/splitncigarreads: + - "modules/nf-core/gatk4/splitncigarreads/**" diff --git a/modules/nf-core/gatk4/variantfiltration/environment.yml b/modules/nf-core/gatk4/variantfiltration/environment.yml new file mode 100644 index 00000000..0812b6f2 --- /dev/null +++ b/modules/nf-core/gatk4/variantfiltration/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_variantfiltration +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/variantfiltration/main.nf b/modules/nf-core/gatk4/variantfiltration/main.nf new file mode 100644 index 00000000..388c60ab --- /dev/null +++ b/modules/nf-core/gatk4/variantfiltration/main.nf @@ -0,0 +1,59 @@ +process GATK4_VARIANTFILTRATION { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dict) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + VariantFiltration \\ + --variant $vcf \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/variantfiltration/meta.yml b/modules/nf-core/gatk4/variantfiltration/meta.yml new file mode 100644 index 00000000..11915a94 --- /dev/null +++ b/modules/nf-core/gatk4/variantfiltration/meta.yml @@ -0,0 +1,77 @@ +name: gatk4_variantfiltration +description: Filter variants +keywords: + - filter + - gatk4 + - variantfiltration + - vcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - vcf: + type: list + description: List of VCF(.gz) files + pattern: "*.{vcf,vcf.gz}" + - vcf_tbi: + type: list + description: List of VCF file indexes + pattern: "*.{idx,tbi}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Fasta file of reference genome + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of fasta file + pattern: "*.fasta.fai" + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - dict: + type: file + description: Sequence dictionary of fastea file + pattern: "*.dict" +output: + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/variantfiltration/tests/main.nf.test b/modules/nf-core/gatk4/variantfiltration/tests/main.nf.test new file mode 100644 index 00000000..36b7438d --- /dev/null +++ b/modules/nf-core/gatk4/variantfiltration/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process GATK4_VARIANTFILTRATION" + script "../main.nf" + process "GATK4_VARIANTFILTRATION" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/variantfiltration" + + test("test_gatk4_variantfiltration_vcf_input") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions, + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name).match() }, + ) + } + + } + + test("test_gatk4_variantfiltration_gz_input") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions, + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name).match() }, + ) + } + + } + +} + diff --git a/modules/nf-core/gatk4/variantfiltration/tests/main.nf.test.snap b/modules/nf-core/gatk4/variantfiltration/tests/main.nf.test.snap new file mode 100644 index 00000000..36c03f7c --- /dev/null +++ b/modules/nf-core/gatk4/variantfiltration/tests/main.nf.test.snap @@ -0,0 +1,30 @@ +{ + "test_gatk4_variantfiltration_gz_input": { + "content": [ + [ + "versions.yml:md5,96943659275ba62de1f0d283a2f6e97b" + ], + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T22:43:27.609784" + }, + "test_gatk4_variantfiltration_vcf_input": { + "content": [ + [ + "versions.yml:md5,96943659275ba62de1f0d283a2f6e97b" + ], + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.2" + }, + "timestamp": "2024-05-22T22:42:55.316294" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/variantfiltration/tests/tags.yml b/modules/nf-core/gatk4/variantfiltration/tests/tags.yml new file mode 100644 index 00000000..4818037d --- /dev/null +++ b/modules/nf-core/gatk4/variantfiltration/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/variantfiltration: + - "modules/nf-core/gatk4/variantfiltration/**" diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml new file mode 100644 index 00000000..c6df58ad --- /dev/null +++ b/modules/nf-core/gffread/environment.yml @@ -0,0 +1,7 @@ +name: gffread +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gffread=0.12.7 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf new file mode 100644 index 00000000..da55cbab --- /dev/null +++ b/modules/nf-core/gffread/main.nf @@ -0,0 +1,60 @@ +process GFFREAD { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' : + 'biocontainers/gffread:0.12.7--hdcf5f25_4' }" + + input: + tuple val(meta), path(gff) + path fasta + + output: + tuple val(meta), path("*.gtf") , emit: gtf , optional: true + tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true + tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def fasta_arg = fasta ? "-g $fasta" : '' + def output_name = "${prefix}.${extension}" + def output = extension == "fasta" ? "$output_name" : "-o $output_name" + def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() + // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + gffread \\ + $gff \\ + $fasta_arg \\ + $args_sorted \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def output_name = "${prefix}.${extension}" + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch $output_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml new file mode 100644 index 00000000..c0602820 --- /dev/null +++ b/modules/nf-core/gffread/meta.yml @@ -0,0 +1,55 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + - fasta: + type: file + description: A multi-fasta file with the genomic sequences + pattern: "*.{fasta,fa,faa,fas,fsa}" +output: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present + pattern: "*.{gtf}" + - gffread_gff: + type: file + description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent + pattern: "*.gff3" + - gffread_fasta: + type: file + description: Fasta file produced when either of '-w', '-x', '-y' parameters is present + pattern: "*.fasta" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" +maintainers: + - "@edmundmiller" + - "@gallvp" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test new file mode 100644 index 00000000..4cd13dcd --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -0,0 +1,223 @@ +nextflow_process { + + name "Test Process GFFREAD" + script "../main.nf" + process "GFFREAD" + + tag "gffread" + tag "modules_nfcore" + tag "modules" + + test("sarscov2-gff3-gtf") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gtf-stub") { + + options '-stub' + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3") { + + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3-stub") { + + options '-stub' + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-fasta") { + + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-stub") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-fail-catch") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'genome'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert ! process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap new file mode 100644 index 00000000..15262320 --- /dev/null +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -0,0 +1,272 @@ +{ + "sarscov2-gff3-gtf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:48:56.496187" + }, + "sarscov2-gff3-gff3": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:49:00.892782" + }, + "sarscov2-gff3-gtf-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:26.975666" + }, + "sarscov2-gff3-fasta-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:44.34792" + }, + "sarscov2-gff3-gff3-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:35.221671" + }, + "sarscov2-gff3-fasta": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:54:02.88143" + } +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow-fasta.config b/modules/nf-core/gffread/tests/nextflow-fasta.config new file mode 100644 index 00000000..ac6cb148 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-fasta.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-w -S' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow-gff3.config b/modules/nf-core/gffread/tests/nextflow-gff3.config new file mode 100644 index 00000000..afe0830e --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-gff3.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '' + } +} diff --git a/modules/nf-core/gffread/tests/nextflow.config b/modules/nf-core/gffread/tests/nextflow.config new file mode 100644 index 00000000..74b25094 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-T' + } +} diff --git a/modules/nf-core/gffread/tests/tags.yml b/modules/nf-core/gffread/tests/tags.yml new file mode 100644 index 00000000..05576065 --- /dev/null +++ b/modules/nf-core/gffread/tests/tags.yml @@ -0,0 +1,2 @@ +gffread: + - modules/nf-core/gffread/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..25910b34 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..468a6f28 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,48 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..231034f2 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,39 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..6406008e --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..720fd9ff --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 00000000..bcb9d64a --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,8 @@ +name: mosdepth +channels: + - conda-forge + - bioconda + - defaults +dependencies: + # renovate: datasource=conda depName=bioconda/mosdepth + - mosdepth=0.3.8 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf new file mode 100644 index 00000000..6f4a8383 --- /dev/null +++ b/modules/nf-core/mosdepth/main.nf @@ -0,0 +1,80 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.8--hd299d5a_0' : + 'biocontainers/mosdepth:0.3.8--hd299d5a_0'}" + + input: + tuple val(meta), path(bam), path(bai), path(bed) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (!bed && args.contains("--thresholds")) { + error "'--thresholds' can only be specified in conjunction with '--by'" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + echo "" | gzip > ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + echo "" | gzip > ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + echo "" | gzip > ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + echo "" | gzip > ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml new file mode 100644 index 00000000..9caaf2cd --- /dev/null +++ b/modules/nf-core/mosdepth/meta.yml @@ -0,0 +1,109 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - global_txt: + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + - regions_txt: + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + - summary_txt: + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + - per_base_bed: + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + - per_base_csi: + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + - regions_bed: + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + - regions_csi: + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + type: file + description: BED file with the number of bases in each region that are covered at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/mosdepth/tests/main.nf.test b/modules/nf-core/mosdepth/tests/main.nf.test new file mode 100644 index 00000000..21eebc1f --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test @@ -0,0 +1,246 @@ +nextflow_process { + + name "Test Process MOSDEPTH" + script "../main.nf" + process "MOSDEPTH" + + tag "modules" + tag "modules_nfcore" + tag "mosdepth" + + test("homo_sapiens - bam, bai, []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - cram, crai, []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - cram, crai, bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - window") { + + config "./window.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - quantized") { + + config "./quantized.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed - thresholds") { + + config "./threshold.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed - fail") { + + config "./window.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mosdepth/tests/main.nf.test.snap b/modules/nf-core/mosdepth/tests/main.nf.test.snap new file mode 100644 index 00000000..c604540b --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test.snap @@ -0,0 +1,1386 @@ +{ + "homo_sapiens - bam, bai, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_d4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:33:16.953408231" + }, + "homo_sapiens - cram, crai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:50.160217828" + }, + "homo_sapiens - bam, bai, [] - quantized": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,4f69e6ace50206a2768be66ded3a56f0" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,4f69e6ace50206a2768be66ded3a56f0" + ] + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:33:01.164885111" + }, + "homo_sapiens - bam, bai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:39.071657456" + }, + "homo_sapiens - bam, bai, [] - window": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,2a30bcb7f5c7632136b3efce24723970" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,2a30bcb7f5c7632136b3efce24723970" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:55.631776118" + }, + "homo_sapiens - bam, bai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:33.642125299" + }, + "homo_sapiens - cram, crai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:44.704920941" + }, + "homo_sapiens - bam, bai, bed - thresholds": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,219414a0751185adb98d2235d83ea055" + ] + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,219414a0751185adb98d2235d83ea055" + ] + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:33:06.737266831" + } +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/quantized.config b/modules/nf-core/mosdepth/tests/quantized.config new file mode 100644 index 00000000..63c55350 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/quantized.config @@ -0,0 +1,3 @@ +process { + ext.args = "--quantize 0:1:4:100:200" +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/tags.yml b/modules/nf-core/mosdepth/tests/tags.yml new file mode 100644 index 00000000..5cd2e08e --- /dev/null +++ b/modules/nf-core/mosdepth/tests/tags.yml @@ -0,0 +1,2 @@ +mosdepth: + - "modules/nf-core/mosdepth/**" diff --git a/modules/nf-core/mosdepth/tests/threshold.config b/modules/nf-core/mosdepth/tests/threshold.config new file mode 100644 index 00000000..9b014ddf --- /dev/null +++ b/modules/nf-core/mosdepth/tests/threshold.config @@ -0,0 +1,3 @@ +process { + ext.args = "--thresholds 1,10,20,30" +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/window.config b/modules/nf-core/mosdepth/tests/window.config new file mode 100644 index 00000000..7a0f755c --- /dev/null +++ b/modules/nf-core/mosdepth/tests/window.config @@ -0,0 +1,3 @@ +process { + ext.args = "--by 100" +} \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/environment.yml b/modules/nf-core/picard/markduplicates/environment.yml new file mode 100644 index 00000000..58b795f5 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/environment.yml @@ -0,0 +1,7 @@ +name: picard_markduplicates +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf new file mode 100644 index 00000000..ad0b2963 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -0,0 +1,68 @@ +process PICARD_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.bai") , emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.metrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + picard \\ + -Xmx${avail_mem}M \\ + MarkDuplicates \\ + $args \\ + --INPUT $reads \\ + --OUTPUT ${prefix}.${suffix} \\ + $reference \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${suffix} + touch ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml new file mode 100644 index 00000000..1f0ffe16 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -0,0 +1,79 @@ +name: picard_markduplicates +description: Locate and tag duplicate reads in a BAM file +keywords: + - markduplicates + - pcr + - duplicates + - bam + - sam + - cram +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Sequence reads file, can be SAM/BAM/CRAM format + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome fasta file, required for CRAM input + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Reference genome fasta index + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with duplicate reads marked/removed + pattern: "*.{bam}" + - bai: + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag + pattern: "*.{bai}" + - cram: + type: file + description: Output CRAM file + pattern: "*.{cram}" + - metrics: + type: file + description: Duplicate metrics file generated by picard + pattern: "*.{metrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test b/modules/nf-core/picard/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..e3e97f6c --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process PICARD_MARKDUPLICATES" + script "../main.nf" + process "PICARD_MARKDUPLICATES" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/markduplicates" + + test("sarscov2 [unsorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("unsorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("unsorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("unsorted_bam_versions") } + ) + } + } + + test("sarscov2 [sorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("sorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("sorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("sorted_bam_versions") } + ) + } + } + + test("homo_sapiens [cram]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("cram_metrics") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..eb17111e --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,110 @@ +{ + "sorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:50.928021" + }, + "unsorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-19T10:26:28.100755" + }, + "cram_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.cram --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.518152" + }, + "sorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T11:39:10.318331" + }, + "cram_name": { + "content": [ + "test.marked.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.459663" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-19T10:27:03.26989" + }, + "unsorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:24.040403" + }, + "unsorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T10:51:12.831787" + }, + "sorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-19T10:26:45.080116" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/tests/nextflow.config b/modules/nf-core/picard/markduplicates/tests/nextflow.config new file mode 100644 index 00000000..02818dd6 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: PICARD_MARKDUPLICATES { + ext.prefix = { "${meta.id}.marked" } + ext.args = '--ASSUME_SORT_ORDER queryname' + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/tags.yml b/modules/nf-core/picard/markduplicates/tests/tags.yml new file mode 100644 index 00000000..4f213d62 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/tags.yml @@ -0,0 +1,2 @@ +picard/markduplicates: + - modules/nf-core/picard/markduplicates/** diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..9c24eb0a --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,10 @@ +name: samtools_faidx + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::htslib=1.19.1 + - bioconda::samtools=1.19.2 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..cfe7ad95 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..f3c25de2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,65 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fa: + type: file + description: FASTA file + pattern: "*.{fa}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 00000000..17244ef2 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 00000000..3e651ef6 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:22:39.412601" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:23:22.427966" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:24:04.107537" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:24:45.868463" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T16:25:27.550554" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 00000000..f76a3ba0 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 00000000..33ebbd5d --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 00000000..e4a83948 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 00000000..bd57cb54 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,8 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..eb5f5252 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..97991358 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,51 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 00000000..24c3c04b --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match("flagstat") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 00000000..a76fc27e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "flagstat": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:31:37.783927" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,fd0030ce49ab3a92091ad80260226452" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:44.299617452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 00000000..2d2b7255 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 00000000..174973b8 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_idxstats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 00000000..a544026f --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 00000000..344e92a3 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,52 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 00000000..a2dcb27c --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.idxstats).match("idxstats") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 00000000..a7050bdc --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,613dde56f108418039ffcdeeddba397a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:50.147462763" + }, + "idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:36:41.561026" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 00000000..d3057c61 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..a5e50649 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..dc14f98d --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..01a4ee03 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,57 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..bb7756d1 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("bai_versions") } + ) + } + } + + test("crai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert snapshot(process.out.versions).match("crai_versions") } + ) + } + } + + test("csi") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("csi_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..3dc8e7de --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "crai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:00.324667957" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:07.885103162" + }, + "crai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:51.641425452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 00000000..fc669b1b --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +name: samtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 00000000..a3048c28 --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,61 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(input_files, stageAs: "?/*") + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 00000000..2e8f3dbb --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,83 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 00000000..8c5668cf --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 00000000..40b36e82 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,137 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("bams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } + ) + } + } + + test("crams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, + { assert snapshot(process.out.bam).match("crams_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, + { assert snapshot(process.out.csi).match("crams_csi") }, + { assert snapshot(process.out.versions).match("crams_versions") } + ) + } + } + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bams_stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..f7da7699 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,228 @@ +{ + "crams_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.647389" + }, + "bams_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.937013" + }, + "bams_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.928616" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.923289" + }, + "bams_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.925716" + }, + "crams_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.655959" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.319539" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:33.782637377" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.92719" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.940498" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.328852" + }, + "bams_stub_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:42.594476052" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.324219" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.933153" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:04.805335656" + }, + "crams_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.650652" + }, + "crams_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:25.889394689" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.33292" + }, + "crams_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.653512" + }, + "bams_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.943839" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml new file mode 100644 index 00000000..b869abcb --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/merge: + - "modules/nf-core/samtools/merge/**" diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..4d898e48 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..fc374f98 --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,63 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + samtools cat \\ + --threads $task.cpus \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..341a7d0e --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,71 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..8360e2b1 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam_stub") { + + config "./nextflow.config" + options "-stub" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..38477656 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,154 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "4": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "versions": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-04T15:08:00.830294" + }, + "bam_stub_bam": { + "content": [ + "test.sorted.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:21:04.364044" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:00.20800281" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "4": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "versions": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-04T15:07:48.773803" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..f642771f --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 00000000..67bb0ca4 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 00000000..52b00f4b --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 00000000..735ff812 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,63 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 00000000..e3d5cb14 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..1b7c9ba4 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,01812900aa4027532906c5d431114233" + ] + ], + "1": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,01812900aa4027532906c5d431114233" + ] + ], + "versions": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:25.562429714" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,5d8681bf541199898c042bf400391d59" + ] + ], + "1": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,5d8681bf541199898c042bf400391d59" + ] + ], + "versions": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:07.857611509" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 00000000..7c28e30f --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/snpeff/download/environment.yml b/modules/nf-core/snpeff/download/environment.yml new file mode 100644 index 00000000..62f3d5aa --- /dev/null +++ b/modules/nf-core/snpeff/download/environment.yml @@ -0,0 +1,7 @@ +name: snpeff_download +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::snpeff=5.1 diff --git a/modules/nf-core/snpeff/download/main.nf b/modules/nf-core/snpeff/download/main.nf new file mode 100644 index 00000000..f1fc4cc3 --- /dev/null +++ b/modules/nf-core/snpeff/download/main.nf @@ -0,0 +1,51 @@ +process SNPEFF_DOWNLOAD { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'biocontainers/snpeff:5.1--hdfd78af_2' }" + + input: + tuple val(meta), val(genome), val(cache_version) + + output: + tuple val(meta), path('snpeff_cache'), emit: cache + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def avail_mem = 6144 + if (!task.memory) { + log.info '[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + snpEff \\ + -Xmx${avail_mem}M \\ + download ${genome}.${cache_version} \\ + -dataDir \${PWD}/snpeff_cache \\ + ${args} + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + + stub: + """ + mkdir ${genome}.${cache_version} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ +} diff --git a/modules/nf-core/snpeff/download/meta.yml b/modules/nf-core/snpeff/download/meta.yml new file mode 100644 index 00000000..f367c696 --- /dev/null +++ b/modules/nf-core/snpeff/download/meta.yml @@ -0,0 +1,43 @@ +name: snpeff_download +description: Genetic variant annotation and functional effect prediction toolbox +keywords: + - annotation + - effect prediction + - snpeff + - variant + - vcf +tools: + - snpeff: + description: | + SnpEff is a variant annotation and effect prediction tool. + It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + homepage: https://pcingola.github.io/SnpEff/ + documentation: https://pcingola.github.io/SnpEff/se_introduction/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + - db: + type: string + description: | + which db to annotate with +output: + - cache: + type: file + description: | + snpEff cache + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/snpeff/snpeff/environment.yml b/modules/nf-core/snpeff/snpeff/environment.yml new file mode 100644 index 00000000..b492e6a8 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/environment.yml @@ -0,0 +1,7 @@ +name: snpeff_snpeff +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::snpeff=5.1 diff --git a/modules/nf-core/snpeff/snpeff/main.nf b/modules/nf-core/snpeff/snpeff/main.nf new file mode 100644 index 00000000..28d13826 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/main.nf @@ -0,0 +1,62 @@ +process SNPEFF_SNPEFF { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'biocontainers/snpeff:5.1--hdfd78af_2' }" + + input: + tuple val(meta), path(vcf) + val db + tuple val(meta2), path(cache) + + output: + tuple val(meta), path("*.ann.vcf"), emit: vcf + tuple val(meta), path("*.csv"), emit: report + tuple val(meta), path("*.html"), emit: summary_html + tuple val(meta), path("*.genes.txt"), emit: genes_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def avail_mem = 6144 + if (!task.memory) { + log.info '[snpEff] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + def prefix = task.ext.prefix ?: "${meta.id}" + def cache_command = cache ? "-dataDir \${PWD}/${cache}" : "" + """ + snpEff \\ + -Xmx${avail_mem}M \\ + $db \\ + $args \\ + -csvStats ${prefix}.csv \\ + $cache_command \\ + $vcf \\ + > ${prefix}.ann.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.ann.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/snpeff/snpeff/meta.yml b/modules/nf-core/snpeff/snpeff/meta.yml new file mode 100644 index 00000000..7559c3de --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/meta.yml @@ -0,0 +1,60 @@ +name: snpeff_snpeff +description: Genetic variant annotation and functional effect prediction toolbox +keywords: + - annotation + - effect prediction + - snpeff + - variant + - vcf +tools: + - snpeff: + description: | + SnpEff is a variant annotation and effect prediction tool. + It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + homepage: https://pcingola.github.io/SnpEff/ + documentation: https://pcingola.github.io/SnpEff/se_introduction/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + vcf to annotate + - db: + type: string + description: | + which db to annotate with + - cache: + type: file + description: | + path to snpEff cache (optional) +output: + - vcf: + type: file + description: | + annotated vcf + pattern: "*.ann.vcf" + - report: + type: file + description: snpEff report csv file + pattern: "*.csv" + - summary_html: + type: file + description: snpEff summary statistics in html file + pattern: "*.html" + - genes_txt: + type: file + description: txt (tab separated) file having counts of the number of variants affecting each transcript and gene + pattern: "*.genes.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/snpeff/snpeff/tests/main.nf.test b/modules/nf-core/snpeff/snpeff/tests/main.nf.test new file mode 100644 index 00000000..dd37f275 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/tests/main.nf.test @@ -0,0 +1,50 @@ +nextflow_process { + + name "Test Process SNPEFF_SNPEFF" + script "../main.nf" + process "SNPEFF_SNPEFF" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "snpeff" + tag "snpeff/download" + tag "snpeff/snpeff" + + test("test_SNPEFF_SNPEFF") { + + setup { + run("SNPEFF_DOWNLOAD") { + script "../../download/main.nf" + process { + """ + input[0] = Channel.of([[id:params.snpeff_genome + '.' + params.snpeff_cache_version], params.snpeff_genome, params.snpeff_cache_version]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ]) + input[1] = params.snpeff_genome + '.' + params.snpeff_cache_version + input[2] = SNPEFF_DOWNLOAD.out.cache + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report[0][1]).exists() }, + { assert path(process.out.summary_html[0][1]).exists() }, + { assert path(process.out.vcf[0][1]).exists() }, + { assert snapshot(process.out.genes_txt).match("genes_txt") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/snpeff/snpeff/tests/main.nf.test.snap b/modules/nf-core/snpeff/snpeff/tests/main.nf.test.snap new file mode 100644 index 00000000..0891b844 --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,25d44a118d558b331d51ec00be0d997c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-18T17:37:18.879477" + }, + "genes_txt": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.genes.txt:md5,130536bf0237d7f3f746d32aaa32840a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-18T17:37:18.874822" + } +} \ No newline at end of file diff --git a/modules/nf-core/snpeff/snpeff/tests/nextflow.config b/modules/nf-core/snpeff/snpeff/tests/nextflow.config new file mode 100644 index 00000000..d31ebf6b --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/tests/nextflow.config @@ -0,0 +1,4 @@ +params { + snpeff_cache_version = "105" + snpeff_genome = "WBcel235" +} diff --git a/modules/nf-core/snpeff/snpeff/tests/tags.yml b/modules/nf-core/snpeff/snpeff/tests/tags.yml new file mode 100644 index 00000000..427b588d --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/tests/tags.yml @@ -0,0 +1,2 @@ +snpeff/snpeff: + - "modules/nf-core/snpeff/snpeff/**" diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml new file mode 100644 index 00000000..8bd58cff --- /dev/null +++ b/modules/nf-core/star/align/environment.yml @@ -0,0 +1,10 @@ +name: star_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::star=2.7.10a + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf new file mode 100644 index 00000000..8e9c48b1 --- /dev/null +++ b/modules/nf-core/star/align/main.nf @@ -0,0 +1,109 @@ +process STAR_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + tuple val(meta2), path(index) + tuple val(meta3), path(gtf) + val star_ignore_sjdbgtf + val seq_platform + val seq_center + + output: + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out'), emit: log_progress + path "versions.yml" , emit: versions + + tuple val(meta), path('*d.out.bam') , optional:true, emit: bam + tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted + tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + tuple val(meta), path('*.SJ.out.tab') , optional:true, emit: spl_junc_tab + tuple val(meta), path('*.ReadsPerGene.out.tab') , optional:true, emit: read_per_gene_tab + tuple val(meta), path('*.out.junction') , optional:true, emit: junction + tuple val(meta), path('*.out.sam') , optional:true, emit: sam + tuple val(meta), path('*.wig') , optional:true, emit: wig + tuple val(meta), path('*.bg') , optional:true, emit: bedgraph + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads1 = [], reads2 = [] + meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } + def ignore_gtf = star_ignore_sjdbgtf ? '' : "--sjdbGTFfile $gtf" + def seq_platform = seq_platform ? "'PL:$seq_platform'" : "" + def seq_center = seq_center ? "'CN:$seq_center'" : "" + def attrRG = args.contains("--outSAMattrRGline") ? "" : "--outSAMattrRGline 'ID:$prefix' $seq_center 'SM:$prefix' $seq_platform" + def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' + def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + """ + STAR \\ + --genomeDir $index \\ + --readFilesIn ${reads1.join(",")} ${reads2.join(",")} \\ + --runThreadN $task.cpus \\ + --outFileNamePrefix $prefix. \\ + $out_sam_type \\ + $ignore_gtf \\ + $attrRG \\ + $args + + $mv_unsorted_bam + + if [ -f ${prefix}.Unmapped.out.mate1 ]; then + mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq + gzip ${prefix}.unmapped_1.fastq + fi + if [ -f ${prefix}.Unmapped.out.mate2 ]; then + mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq + gzip ${prefix}.unmapped_2.fastq + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}Xd.out.bam + touch ${prefix}.Log.final.out + touch ${prefix}.Log.out + touch ${prefix}.Log.progress.out + touch ${prefix}.sortedByCoord.out.bam + touch ${prefix}.toTranscriptome.out.bam + touch ${prefix}.Aligned.unsort.out.bam + touch ${prefix}.Aligned.sortedByCoord.out.bam + touch ${prefix}.unmapped_1.fastq.gz + touch ${prefix}.unmapped_2.fastq.gz + touch ${prefix}.tab + touch ${prefix}.SJ.out.tab + touch ${prefix}.ReadsPerGene.out.tab + touch ${prefix}.Chimeric.out.junction + touch ${prefix}.out.sam + touch ${prefix}.Signal.UniqueMultiple.str1.out.wig + touch ${prefix}.Signal.UniqueMultiple.str1.out.bg + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml new file mode 100644 index 00000000..e80dbb7d --- /dev/null +++ b/modules/nf-core/star/align/meta.yml @@ -0,0 +1,115 @@ +name: star_align +description: Align reads to a reference genome using STAR +keywords: + - align + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - index: + type: directory + description: STAR genome index + pattern: "star" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: Annotation GTF file + pattern: "*.{gtf}" + - star_ignore_sjdbgtf: + type: boolean + description: Ignore annotation GTF file + - seq_platform: + type: string + description: Sequencing platform + - seq_center: + type: string + description: Sequencing center +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - log_final: + type: file + description: STAR final log file + pattern: "*Log.final.out" + - log_out: + type: file + description: STAR lot out file + pattern: "*Log.out" + - log_progress: + type: file + description: STAR log progress file + pattern: "*Log.progress.out" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam_sorted: + type: file + description: Sorted BAM file of read alignments (optional) + pattern: "*sortedByCoord.out.bam" + - bam_transcript: + type: file + description: Output BAM file of transcriptome alignment (optional) + pattern: "*toTranscriptome.out.bam" + - bam_unsorted: + type: file + description: Unsorted BAM file of read alignments (optional) + pattern: "*Aligned.unsort.out.bam" + - fastq: + type: file + description: Unmapped FastQ files (optional) + pattern: "*fastq.gz" + - tab: + type: file + description: STAR output tab file(s) (optional) + pattern: "*.tab" + - junction: + type: file + description: STAR chimeric junction output file (optional) + pattern: "*.out.junction" + - wig: + type: file + description: STAR output wiggle format file(s) (optional) + pattern: "*.wig" + - bedgraph: + type: file + description: STAR output bedGraph format file(s) (optional) + pattern: "*.bg" +authors: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" +maintainers: + - "@kevinmenden" + - "@drpatelh" + - "@praveenraj2018" diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test new file mode 100644 index 00000000..6ecd7786 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -0,0 +1,268 @@ +nextflow_process { + + name "Test Process STAR_ALIGN" + script "../main.nf" + process "STAR_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/align" + tag "star/genomegenerate" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + test("homo_sapiens - single_end") { + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - single_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - single_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - single_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - single_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - single_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - single_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - single_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - single_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - single_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - single_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - single_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - single_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - single_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - single_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - single_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - single_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end") { + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - log_out") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - junction") }, + { assert snapshot(process.out.log_progress).match("homo_sapiens - paired_end - log_progress") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - versions") } + ) + } + } + + test("homo_sapiens - paired_end - arriba") { + config "./nextflow.arriba.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - arriba - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - arriba - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - arriba - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - arriba - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - arriba - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - arriba - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - arriba - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - arriba - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - arriba - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - arriba - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - arriba - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - arriba - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - arriba - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - arriba - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - arriba - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - arriba - versions") } + ) + } + } + + test("homo_sapiens - paired_end - starfusion") { + config "./nextflow.starfusion.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - starfusion - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - starfusion - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - starfusion - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - starfusion - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - starfusion - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - starfusion - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - starfusion - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - starfusion - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - starfusion - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - starfusion - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - starfusion - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - starfusion - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - starfusion - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - starfusion - versions") } + ) + } + } + + test("homo_sapiens - paired_end - multiple") { + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.log_final[0][1]).name).match("homo_sapiens - paired_end - multiple - log_final") }, + { assert snapshot(file(process.out.log_out[0][1]).name).match("homo_sapiens - paired_end - multiple - log_out") }, + { assert snapshot(file(process.out.log_progress[0][1]).name).match("homo_sapiens - paired_end - multiple - log_progress") }, + { assert snapshot(process.out.bam).match("homo_sapiens - paired_end - multiple - bam") }, + { assert snapshot(process.out.bam_sorted).match("homo_sapiens - paired_end - multiple - bam_sorted") }, + { assert snapshot(process.out.bam_transcript).match("homo_sapiens - paired_end - multiple - bam_transcript") }, + { assert snapshot(process.out.bam_unsorted).match("homo_sapiens - paired_end - multiple - bam_unsorted") }, + { assert snapshot(process.out.bedgraph).match("homo_sapiens - paired_end - multiple - bedgraph") }, + { assert snapshot(process.out.fastq).match("homo_sapiens - paired_end - multiple - fastq") }, + { assert snapshot(process.out.junction).match("homo_sapiens - paired_end - multiple - junction") }, + { assert snapshot(process.out.read_per_gene_tab).match("homo_sapiens - paired_end - multiple - read_per_gene_tab") }, + { assert snapshot(process.out.sam).match("homo_sapiens - paired_end - multiple - sam") }, + { assert snapshot(process.out.spl_junc_tab).match("homo_sapiens - paired_end - multiple - spl_junc_tab") }, + { assert snapshot(process.out.tab).match("homo_sapiens - paired_end - multiple - tab") }, + { assert snapshot(process.out.wig).match("homo_sapiens - paired_end - multiple - wig") }, + { assert snapshot(process.out.versions).match("homo_sapiens - paired_end - multiple - versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap new file mode 100644 index 00000000..08edb914 --- /dev/null +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -0,0 +1,769 @@ +{ + "homo_sapiens - paired_end - multiple - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-12-04T18:01:19.968225733" + }, + "homo_sapiens - paired_end - multiple - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.857804" + }, + "homo_sapiens - paired_end - arriba - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.347549723" + }, + "homo_sapiens - single_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.24701" + }, + "homo_sapiens - paired_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.383818" + }, + "homo_sapiens - paired_end - arriba - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:56:12.431212643" + }, + "homo_sapiens - paired_end - multiple - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.07119229" + }, + "homo_sapiens - paired_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.368841" + }, + "homo_sapiens - paired_end - arriba - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.102537" + }, + "homo_sapiens - single_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.185369" + }, + "homo_sapiens - paired_end - arriba - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,5155c9fd1f787ad6d7d80987fb06219c" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.268388251" + }, + "homo_sapiens - single_end - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.216183" + }, + "homo_sapiens - paired_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.327236" + }, + "homo_sapiens - single_end - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:53:26.664210196" + }, + "homo_sapiens - paired_end - multiple - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:29:01.022176" + }, + "homo_sapiens - paired_end - arriba - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.15277" + }, + "homo_sapiens - paired_end - multiple - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.52923" + }, + "homo_sapiens - paired_end - multiple - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.189486201" + }, + "homo_sapiens - paired_end - starfusion - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:27:55.905883" + }, + "homo_sapiens - paired_end - starfusion - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.192302" + }, + "homo_sapiens - paired_end - multiple - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.661837" + }, + "homo_sapiens - paired_end - multiple - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:29:00.966417" + }, + "homo_sapiens - paired_end - starfusion - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.53235164" + }, + "homo_sapiens - paired_end - arriba - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.202776" + }, + "homo_sapiens - single_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test.Signal.Unique.str1.out.bg:md5,c56fc1472776fb927eaf62d973da5f9a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,e93373cf6f2a2a9506e2efdb260cdd4f" + ] + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.394863748" + }, + "homo_sapiens - paired_end - arriba - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.251962" + }, + "homo_sapiens - paired_end - starfusion - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.040843" + }, + "homo_sapiens - single_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.154172" + }, + "homo_sapiens - paired_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-12-04T17:54:11.934832258" + }, + "homo_sapiens - paired_end - arriba - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.998817" + }, + "homo_sapiens - paired_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:23:33.259699" + }, + "homo_sapiens - paired_end - arriba - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:25:06.849451" + }, + "homo_sapiens - paired_end - multiple - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T18:01:20.393705142" + }, + "homo_sapiens - paired_end - starfusion - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.082408" + }, + "homo_sapiens - paired_end - starfusion - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.818041322" + }, + "homo_sapiens - single_end - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.175307" + }, + "homo_sapiens - paired_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.255481058" + }, + "homo_sapiens - paired_end - starfusion - bedgraph": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.155413" + }, + "homo_sapiens - single_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.144852" + }, + "homo_sapiens - paired_end - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:54:12.343840482" + }, + "homo_sapiens - paired_end - multiple - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,069877e053714e23010fe4e1c003b4a2" + ] + ] + ], + "timestamp": "2023-12-04T18:01:20.291692062" + }, + "homo_sapiens - single_end - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.265642675" + }, + "homo_sapiens - paired_end - arriba - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.444214" + }, + "homo_sapiens - paired_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.126063825" + }, + "homo_sapiens - paired_end - arriba - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:25:06.829799" + }, + "homo_sapiens - paired_end - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.300509" + }, + "homo_sapiens - paired_end - arriba - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.300383" + }, + "homo_sapiens - paired_end - multiple - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" + ] + ] + ], + "timestamp": "2023-12-04T18:01:19.851247126" + }, + "homo_sapiens - paired_end - multiple - fastq": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.462257" + }, + "homo_sapiens - single_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.335457371" + }, + "homo_sapiens - paired_end - arriba - bam_sorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:06.94699" + }, + "homo_sapiens - paired_end - starfusion - junction": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.641115828" + }, + "homo_sapiens - single_end - tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.580593434" + }, + "homo_sapiens - paired_end - starfusion - versions": { + "content": [ + [ + "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + ] + ], + "timestamp": "2023-12-04T17:59:58.907317103" + }, + "homo_sapiens - paired_end - multiple - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.330463" + }, + "homo_sapiens - paired_end - arriba - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:25:06.86866" + }, + "homo_sapiens - paired_end - bedgraph": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.Signal.Unique.str1.out.bg:md5,d7bf8b70b436ca048a62513e1d0ece3a", + "test.Signal.UniqueMultiple.str1.out.bg:md5,686d58493b9eb445b56ace4d67f76ef6" + ] + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.064121304" + }, + "homo_sapiens - paired_end - starfusion - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.118974" + }, + "homo_sapiens - paired_end - starfusion - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.264699" + }, + "homo_sapiens - paired_end - multiple - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:29:01.076947" + }, + "homo_sapiens - paired_end - arriba - bam_unsorted": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:25:07.050409" + }, + "homo_sapiens - paired_end - bam_sorted": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.002180537" + }, + "homo_sapiens - single_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.50932751" + }, + "homo_sapiens - paired_end - starfusion - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,19c3faa1bfa9a0cc5e4c45f17065b53a" + ] + ] + ], + "timestamp": "2023-12-04T17:59:58.731699486" + }, + "homo_sapiens - single_end - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:22:55.126286" + }, + "homo_sapiens - paired_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:23:33.253884" + }, + "homo_sapiens - single_end - log_final": { + "content": [ + "test.Log.final.out" + ], + "timestamp": "2023-11-23T13:22:55.11799" + }, + "homo_sapiens - paired_end - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.287684" + }, + "homo_sapiens - paired_end - starfusion - log_progress": { + "content": [ + "test.Log.progress.out" + ], + "timestamp": "2023-11-23T13:27:55.971484" + }, + "homo_sapiens - paired_end - multiple - bam_transcript": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.264176" + }, + "homo_sapiens - paired_end - multiple - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:29:01.596406" + }, + "homo_sapiens - single_end - read_per_gene_tab": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:22:55.205936" + }, + "homo_sapiens - paired_end - junction": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.340653" + }, + "homo_sapiens - paired_end - spl_junc_tab": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "timestamp": "2023-12-04T17:54:12.185730856" + }, + "homo_sapiens - paired_end - starfusion - sam": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.300637" + }, + "homo_sapiens - paired_end - arriba - bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" + ] + ] + ], + "timestamp": "2023-12-04T17:56:12.190560178" + }, + "homo_sapiens - single_end - log_progress": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Log.progress.out:md5,b2bd061d6cbaaf3d6d3b1fed547f69b8" + ] + ] + ], + "timestamp": "2023-12-04T17:53:26.450352138" + }, + "homo_sapiens - paired_end - starfusion - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:27:56.422018" + }, + "homo_sapiens - paired_end - wig": { + "content": [ + [ + + ] + ], + "timestamp": "2023-11-23T13:23:33.429457" + }, + "homo_sapiens - paired_end - starfusion - log_out": { + "content": [ + "test.Log.out" + ], + "timestamp": "2023-11-23T13:27:55.93945" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/align/tests/nextflow.arriba.config b/modules/nf-core/star/align/tests/nextflow.arriba.config new file mode 100644 index 00000000..2324b9e5 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.arriba.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.config b/modules/nf-core/star/align/tests/nextflow.config new file mode 100644 index 00000000..c4ac5808 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --outWigType bedGraph --outWigStrand Unstranded' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/nextflow.starfusion.config b/modules/nf-core/star/align/tests/nextflow.starfusion.config new file mode 100644 index 00000000..467b6497 --- /dev/null +++ b/modules/nf-core/star/align/tests/nextflow.starfusion.config @@ -0,0 +1,14 @@ +process { + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outReadsUnmapped None --twopassMode Basic --outSAMstrandField intronMotif --outSAMunmapped Within --chimSegmentMin 12 --chimJunctionOverhangMin 8 --chimOutJunctionFormat 1 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --chimMultimapScoreRange 3 --chimScoreJunctionNonGTAG -4 --chimMultimapNmax 20 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --alignInsertionFlush Right --alignSplicedMateMapLminOverLmate 0 --alignSplicedMateMapLmin 30' + } + +} + +// Fix chown issue for the output star folder +docker.runOptions = '--platform=linux/amd64 -u $(id -u):$(id -g)' diff --git a/modules/nf-core/star/align/tests/tags.yml b/modules/nf-core/star/align/tests/tags.yml new file mode 100644 index 00000000..8beace16 --- /dev/null +++ b/modules/nf-core/star/align/tests/tags.yml @@ -0,0 +1,2 @@ +star/align: + - modules/nf-core/star/align/** diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml new file mode 100644 index 00000000..791f255e --- /dev/null +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -0,0 +1,10 @@ +name: star_genomegenerate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 + - bioconda::star=2.7.10a + - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf new file mode 100644 index 00000000..b8855715 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -0,0 +1,119 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("star") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def include_gtf = gtf ? "--sjdbGTFfile $gtf" : '' + if (args_list.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + $include_gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } + + stub: + if (gtf) { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } else { + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/genomeParameters.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml new file mode 100644 index 00000000..1061e1b8 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/meta.yml @@ -0,0 +1,53 @@ +name: star_genomegenerate +description: Create index for STAR +keywords: + - index + - fasta + - genome + - reference +tools: + - star: + description: | + STAR is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/alexdobin/STAR + manual: https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf + doi: 10.1093/bioinformatics/bts635 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Fasta file of the reference genome + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - gtf: + type: file + description: GTF file of the reference genome +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: Folder containing the star index files + pattern: "star" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test new file mode 100644 index 00000000..c17c8ba4 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_process { + + name "Test Process STAR_GENOMEGENERATE" + script "../main.nf" + process "STAR_GENOMEGENERATE" + tag "modules" + tag "modules_nfcore" + tag "star" + tag "star/genomegenerate" + + test("fasta_gtf") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_versions") } + ) + } + } + + test("fasta_gtf_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_stub_versions") } + ) + } + } + + test("fasta") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_index") }, + { assert snapshot(process.out.versions).match("fasta_versions") } + ) + } + + } + + test("fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ [], [] ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_stub_versions") } + ) + } + + } + +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap new file mode 100644 index 00000000..5653d6e6 --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -0,0 +1,90 @@ +{ + "fasta_gtf_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.798555" + }, + "fasta_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.521209" + }, + "fasta_gtf_stub_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.478098" + }, + "fasta_gtf_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.491657" + }, + "fasta_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.552329" + }, + "fasta_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.560541" + }, + "fasta_gtf_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.786814" + }, + "fasta_stub_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.517472" + } +} \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/tests/tags.yml b/modules/nf-core/star/genomegenerate/tests/tags.yml new file mode 100644 index 00000000..79f619bf --- /dev/null +++ b/modules/nf-core/star/genomegenerate/tests/tags.yml @@ -0,0 +1,2 @@ +star/genomegenerate: + - modules/nf-core/star/genomegenerate/** diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml new file mode 100644 index 00000000..c4235872 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -0,0 +1,8 @@ +name: tabix_bgziptabix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf new file mode 100644 index 00000000..bcdcf2a6 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -0,0 +1,47 @@ +process TABIX_BGZIPTABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : + 'biocontainers/htslib:1.19.1--h81da01d_1' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.gz"), path("*.tbi"), optional: true, emit: gz_tbi + tuple val(meta), path("*.gz"), path("*.csi"), optional: true, emit: gz_csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz + tabix $args2 ${prefix}.${input.getExtension()}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.${input.getExtension()}.gz + touch ${prefix}.${input.getExtension()}.gz.tbi + touch ${prefix}.${input.getExtension()}.gz.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml new file mode 100644 index 00000000..438aba4d --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -0,0 +1,53 @@ +name: tabix_bgziptabix +description: bgzip a sorted tab-delimited genome file and then create tabix index +keywords: + - bgzip + - compress + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file + pattern: "*.{bed,gff,sam,vcf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gz: + type: file + description: Output compressed file + pattern: "*.{gz}" + - tbi: + type: file + description: tabix index file + pattern: "*.{gz.tbi}" + - csi: + type: file + description: tabix alternate index file + pattern: "*.{gz.csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@DLBPointon" +maintainers: + - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test new file mode 100644 index 00000000..87ea2c84 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process TABIX_BGZIPTABIX" + script "modules/nf-core/tabix/bgziptabix/main.nf" + process "TABIX_BGZIPTABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgziptabix" + + test("sarscov2_bed_tbi") { + config "./tabix_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'tbi_test' ], + [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_tbi[0][1]).name + ).match("tbi_test") + } + ) + } + } + + test("sarscov2_bed_csi") { + config "./tabix_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'csi_test' ], + [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_csi[0][1]).name + ).match("csi_test") + } + ) + } + + } + + test("sarscov2_bed_csi_stub") { + config "./tabix_csi.config" + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_csi[0][1]).name + ).match("csi_stub") + } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap new file mode 100644 index 00000000..fcecb2e4 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap @@ -0,0 +1,143 @@ +{ + "sarscov2_bed_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ], + "gz_csi": [ + + ], + "gz_tbi": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4" + ] + ], + "versions": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ] + } + ], + "timestamp": "2024-02-19T14:50:51.513838" + }, + "sarscov2_bed_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43" + ] + ], + "2": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ], + "gz_csi": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43" + ] + ], + "gz_tbi": [ + + ], + "versions": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ] + } + ], + "timestamp": "2024-02-19T14:51:00.513777" + }, + "csi_test": { + "content": [ + "csi_test.bed.gz" + ], + "timestamp": "2024-02-19T14:51:00.548801" + }, + "csi_stub": { + "content": [ + "test.bed.gz" + ], + "timestamp": "2024-02-19T14:51:09.218454" + }, + "tbi_test": { + "content": [ + "tbi_test.bed.gz" + ], + "timestamp": "2024-02-19T14:50:51.579654" + }, + "sarscov2_bed_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ], + "gz_csi": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gz_tbi": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ] + } + ], + "timestamp": "2024-02-19T14:51:09.164254" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config new file mode 100644 index 00000000..fb41a314 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIPTABIX { + ext.args2 = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config new file mode 100644 index 00000000..c1915dc4 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIPTABIX { + ext.args2 = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tags.yml b/modules/nf-core/tabix/bgziptabix/tests/tags.yml new file mode 100644 index 00000000..5052b4d7 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgziptabix: + - "modules/nf-core/tabix/bgziptabix/**" diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 00000000..76b45e16 --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,8 @@ +name: tabix_tabix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf new file mode 100644 index 00000000..1737141d --- /dev/null +++ b/modules/nf-core/tabix/tabix/main.nf @@ -0,0 +1,42 @@ +process TABIX_TABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : + 'biocontainers/htslib:1.19.1--h81da01d_1' }" + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + tabix $args $tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${tab}.tbi + touch ${tab}.csi + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml new file mode 100644 index 00000000..ae5b4f43 --- /dev/null +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -0,0 +1,49 @@ +name: tabix_tabix +description: create tabix index from a sorted bgzip tab-delimited genome file +keywords: + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file compressed with bgzip + pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tbi: + type: file + description: tabix index file + pattern: "*.{tbi}" + - csi: + type: file + description: coordinate sorted index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test new file mode 100644 index 00000000..3a150c70 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -0,0 +1,142 @@ +nextflow_process { + + name "Test Process TABIX_TABIX" + script "modules/nf-core/tabix/tabix/main.nf" + process "TABIX_TABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/tabix" + + test("sarscov2_bedgz_tbi") { + config "./tabix_bed.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_bed' ], + [ file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_bed") + } + ) + } + } + + test("sarscov2_gff_tbi") { + config "./tabix_gff.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_gff' ], + [ file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_gff") + } + ) + } + + } + + test("sarscov2_vcf_tbi") { + config "./tabix_vcf_tbi.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_vcf' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_vcf") + } + ) + } + + } + + test("sarscov2_vcf_csi") { + config "./tabix_vcf_csi.config" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.csi[0][1]).name + ).match("vcf_csi") + } + ) + } + + } + + test("sarscov2_vcf_csi_stub") { + config "./tabix_vcf_csi.config" + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi_stub' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.csi[0][1]).name + ).match("vcf_csi_stub") + } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap new file mode 100644 index 00000000..034e38b6 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -0,0 +1,217 @@ +{ + "vcf_csi_stub": { + "content": [ + "test.vcf.gz.csi" + ], + "timestamp": "2024-03-04T14:51:59.788002" + }, + "tbi_gff": { + "content": [ + "genome.gff3.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:37.420216" + }, + "sarscov2_gff_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:37.388157" + }, + "sarscov2_bedgz_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:28.879408" + }, + "tbi_vcf": { + "content": [ + "test.vcf.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:46.402522" + }, + "vcf_csi": { + "content": [ + "test.vcf.gz.csi" + ], + "timestamp": "2024-02-19T14:53:54.921189" + }, + "sarscov2_vcf_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:46.370358" + }, + "sarscov2_vcf_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + ], + "csi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + ] + } + ], + "timestamp": "2024-03-04T14:51:59.766184" + }, + "sarscov2_vcf_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + ] + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + ] + ], + "tbi": [ + + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:54.886876" + }, + "tbi_bed": { + "content": [ + "test.bed.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:28.947628" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_bed.config b/modules/nf-core/tabix/tabix/tests/tabix_bed.config new file mode 100644 index 00000000..7ff05905 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_bed.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p bed' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_gff.config b/modules/nf-core/tabix/tabix/tests/tabix_gff.config new file mode 100644 index 00000000..20c0a1e3 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_gff.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p gff' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config new file mode 100644 index 00000000..eb4f2d7e --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config new file mode 100644 index 00000000..2774c8a9 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tags.yml b/modules/nf-core/tabix/tabix/tests/tags.yml new file mode 100644 index 00000000..6eda0653 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/tabix: + - "modules/nf-core/tabix/tabix/**" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..0c9cbb10 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,11 @@ +name: untar + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.7 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..8a75bb95 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..a9a2110f --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,46 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..2a7c97bf --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,47 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..64550292 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:41.320643" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:33.795172" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/nextflow.config b/nextflow.config index 0758a8f9..03a582ed 100644 --- a/nextflow.config +++ b/nextflow.config @@ -8,15 +8,83 @@ // Global default params, used in configs params { - - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null // sample sheet // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + genome = 'GRCh38' + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + save_reference = false + save_merged_fastq = false + feature_type = 'exon' + + // Sequence read information + read_length = 150 // Required for STAR to build index and align reads + + // Alignment + aligner = 'star' // Only STAR is currently supported. + star_twopass = true + star_ignore_sjdbgtf = false // Ignore GTF file while creating index or alignment by STAR + star_max_memory_bamsort = 0 // STAR parameter limitBAMsortRAM to specify maximum RAM for sorting BAM + star_bins_bamsort = 50 // STAR parameter outBAMsortingBinsN to specify number of bins for sorting BAM + star_max_collapsed_junc = 1000000 // STAR parameter limitOutSJcollapsed to specify max number of collapsed junctions + seq_center = null + seq_platform = 'illumina' // Required for preparing for BAM headers for GATK to work + bam_csi_index = false + save_unaligned = false + save_align_intermeds = false + + // Preprocessing of alignment + remove_duplicates = false + + // Variant calling + no_intervals = false + + // Variant annotation + annotate_tools = null // List of annotation tools to run - snpeff or vep or merge + download_cache = false // Do not download annotation cache + dbnsfp = null // No dbnsfp processed file + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin + dbnsfp_tbi = null // No dbnsfp processed file index + outdir_cache = null // No default outdir cache + snpeff_cache = 's3://annotation-cache/snpeff_cache/' + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + use_annotation_cache_keys = false + vep_cache = 's3://annotation-cache/vep_cache/' + vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP + vep_dbnsfp = null // dbnsfp plugin disabled within VEP + vep_include_fasta = false // Don't use fasta file for annotation with VEP + vep_loftee = null // loftee plugin disabled within VEP + vep_out_format = "vcf" + vep_spliceai = null // spliceai plugin disabled within VEP + vep_spliceregion = null // spliceregion plugin disabled within VEP + + // Skip steps + skip_baserecalibration = false + skip_intervallisttools = false + skip_variantfiltration = false + skip_variantannotation = false + + // GATK intervallist parameters + gatk_interval_scatter_count = 25 + + // GATK haplotypecaller parameters + gatk_hc_call_conf = 20 + generate_gvcf = false + + //GATK variant filter parameters + gatk_vf_window_size = 35 + gatk_vf_cluster_size = 3 + gatk_vf_fs_filter = 30.0 + gatk_vf_qd_filter = 2.0 + + // QC + skip_multiqc = false // MultiQC options multiqc_config = null @@ -37,7 +105,8 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data' // Config options config_profile_name = null @@ -162,8 +231,7 @@ profiles { includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/rnavar custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/rnavar.config" : "/dev/null" +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/rnavar.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -227,7 +295,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' version = '2.0.0dev' - doi = '' + doi = '10.5281/zenodo.6669636' } // Nextflow plugins @@ -268,3 +336,5 @@ validation { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules.config' +includeConfig 'conf/modules/annotate.config' +includeConfig 'conf/modules/prepare_cache.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1c875df1..5fc8d5a3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -13,15 +13,15 @@ "required": ["input", "outdir"], "properties": { "input": { + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "A design file with information about the samples in your experiment. Use this parameter to specify the location of the input files. It has to be a comma-separated file with a header row. See [usage docs](https://nf-co.re/rnavar/usage#input).", + "fa_icon": "fas fa-file-csv", "type": "string", "format": "file-path", "exists": true, "schema": "assets/schema_input.json", "mimetype": "text/csv", - "pattern": "^\\S+\\.csv$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/rnavar/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "pattern": "^\\S+\\.csv$" }, "outdir": { "type": "string", @@ -40,6 +40,10 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "save_merged_fastq": { + "type": "boolean", + "description": "Save FastQ files after merging re-sequenced libraries in the results directory." } } }, @@ -65,6 +69,100 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, + "dict": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to FASTA dictionary file.", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference." + }, + "fasta_fai": { + "type": "string", + "fa_icon": "fas fa-file", + "help_text": "> **NB** If none provided, will be generated automatically from the FASTA reference", + "description": "Path to FASTA reference index." + }, + "gtf": { + "type": "string", + "description": "Path to GTF annotation file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified." + }, + "gff": { + "type": "string", + "description": "Path to GFF3 annotation file.", + "help_text": "This parameter must be specified if `--genome` or `--gtf` are not specified." + }, + "exon_bed": { + "type": "string", + "description": "Path to BED file containing exon intervals. This will be created from the GTF file if not specified." + }, + "read_length": { + "type": "number", + "default": 150, + "description": "Read length", + "help_text": "Specify the read length for the STAR aligner." + }, + "save_reference": { + "type": "boolean", + "help_text": "If the STAR index is generated by the pipeline, then please use this parameter to save it to your results folder. These index can then be used for future pipeline runs, reducing processing times.", + "description": "If generated by the pipeline, save the STAR index in the results directory." + }, + "known_indels": { + "type": "string", + "description": "Path to known indels VCF file" + }, + "known_indels_tbi": { + "type": "string", + "description": "Path to known indels index file" + }, + "dbsnp": { + "type": "string", + "description": "Path to dbSNP VCF file" + }, + "dbsnp_tbi": { + "type": "string", + "description": "Path to dbSNP VCF index file" + }, + "snpeff_db": { + "type": "string", + "fa_icon": "fas fa-database", + "description": "snpEff DB version.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`." + }, + "snpeff_genome": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "snpEff genome.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when using the container with pre-downloaded cache." + }, + "vep_genome": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "VEP genome.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when using the container with pre-downloaded cache." + }, + "vep_species": { + "type": "string", + "fa_icon": "fas fa-microscope", + "description": "VEP species.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively species listed in Ensembl Genomes caches can be used." + }, + "vep_cache_version": { + "type": "string", + "fa_icon": "fas fa-tag", + "description": "VEP cache version.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively cache version can be use to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers" + }, + "feature_type": { + "type": "string", + "description": "Type of feature to parse from annotation file", + "help_text": "This parameter value can be exon, transcript or gene. Default exon" + }, + "download_cache": { + "type": "boolean", + "fa_icon": "fas fa-download", + "description": "Download annotation cache.", + "help_text": "Set this parameter, if you wish to download annotation cache." + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -82,6 +180,343 @@ } } }, + "alignment_options": { + "title": "Alignment options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define parameters related to read alignment", + "properties": { + "aligner": { + "type": "string", + "default": "star", + "description": "Specifies the alignment algorithm to use. Currently available option is 'star'", + "help_text": "This parameter define which aligner is to be used for aligning the RNA reads to the reference genome. Currently only STAR aligner is supported. So use 'star' as the value for this option." + }, + "star_index": { + "type": "string", + "description": "Path to STAR index folder or compressed file (tar.gz)", + "help_text": "This parameter can be used if there is an pre-defined STAR index available. You can either give the full path to the index directory or a compressed file in tar.gz format." + }, + "star_twopass": { + "type": "boolean", + "description": "Enable STAR 2-pass mapping mode.", + "help_text": "This parameter enables STAR to perform 2-pass mapping. Default true." + }, + "star_ignore_sjdbgtf": { + "type": "boolean", + "description": "Do not use GTF file during STAR index buidling step", + "help_text": "Do not use parameter --sjdbGTFfile during the STAR genomeGenerate process." + }, + "star_max_memory_bamsort": { + "type": "integer", + "default": 0, + "description": "Option to limit RAM when sorting BAM file. Value to be specified in bytes. If 0, will be set to the genome index size.", + "help_text": "This parameter specifies the maximum available RAM (bytes) for sorting BAM during STAR alignment." + }, + "star_bins_bamsort": { + "type": "integer", + "default": 50, + "description": "Specifies the number of genome bins for coordinate-sorting", + "help_text": "This parameter specifies the number of bins to be used for coordinate sorting during STAR alignment step." + }, + "star_max_collapsed_junc": { + "type": "integer", + "default": 1000000, + "description": "Specifies the maximum number of collapsed junctions" + }, + "seq_center": { + "type": "string", + "description": "Sequencing center information to be added to read group of BAM files.", + "help_text": "This parameter is required for creating a proper BAM header to use in the downstream analysis of GATK. " + }, + "seq_platform": { + "type": "string", + "default": "illumina", + "description": "Specify the sequencing platform used", + "help_text": "This parameter is required for creating a proper BAM header to use in the downstream analysis of GATK. " + }, + "save_unaligned": { + "type": "boolean", + "description": "Where possible, save unaligned reads from aligner to the results directory.", + "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." + }, + "save_align_intermeds": { + "type": "boolean", + "description": "Save the intermediate BAM files from the alignment step.", + "help_text": "By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set this parameter to also save other intermediate BAM files." + }, + "bam_csi_index": { + "type": "boolean", + "description": "Create a CSI index for BAM files instead of the traditional BAI index. This will be required for genomes with larger chromosome sizes." + } + } + }, + "preprocessing": { + "title": "Preprocessing of alignment", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-toolbox", + "properties": { + "remove_duplicates": { + "type": "boolean", + "fa_icon": "fas fa-hammer", + "description": "Specify whether to remove duplicates from the BAM during Picard MarkDuplicates step.", + "help_text": "Specify true for removing duplicates from BAM file during Picard MarkDuplicates step.", + "hidden": false + } + } + }, + "variant_calling": { + "title": "Variant calling", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-toolbox", + "properties": { + "gatk_hc_call_conf": { + "type": "number", + "default": 20, + "fa_icon": "fas fa-hammer", + "description": "The minimum phred-scaled confidence threshold at which variants should be called.", + "help_text": "Specify the minimum phred-scaled confidence threshold at which variants should be called.", + "hidden": false + }, + "generate_gvcf": { + "type": "boolean", + "description": "Enable generation of GVCFs by sample additionnaly to the VCFs.", + "help_text": "This parameter enables GATK HAPLOTYPECALLER to generate GVCFs. Default false." + } + } + }, + "variant_annotation": { + "title": "Variant Annotation", + "type": "object", + "description": "", + "default": "", + "fa_icon": "fas fa-toolbox", + "properties": { + "annotate_tools": { + "type": "string", + "fa_icon": "fas fa-hammer", + "description": "Specify which tools RNAvar should use for annotating variants. Values can be 'snpeff', 'vep' or 'merge'. If you specify 'merge', the pipeline runs both snpeff and VEP annotation.", + "help_text": "List of tools to be used for variant annotation.", + "pattern": "^((snpeff|vep|merge)*(,)*)*$", + "hidden": true + }, + "vep_cache": { + "type": "string", + "fa_icon": "fas fa-file", + "default": "s3://annotation-cache/vep_cache/", + "description": "Path to VEP cache.", + "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}" + }, + "snpeff_cache": { + "type": "string", + "fa_icon": "fas fa-file", + "default": "s3://annotation-cache/snpeff_cache/", + "description": "Path to snpEff cache.", + "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}" + }, + "vep_include_fasta": { + "type": "boolean", + "fa_icon": "fas fa-file", + "description": "Allow usage of fasta file for annotation with VEP", + "hidden": true, + "help_text": "By pointing VEP to a FASTA file, it is possible to retrieve reference sequence locally. This enables VEP to retrieve HGVS notations (--hgvs), check the reference sequence given in input data, and construct transcript models from a GFF or GTF file without accessing a database.\n\nFor details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html#fasta)." + }, + "vep_dbnsfp": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP dbNSFP plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp)." + }, + "dbnsfp": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to dbNSFP processed file.", + "help_text": "To be used with `--vep_dbnsfp`.\ndbNSFP files and more information are available at https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp and https://sites.google.com/site/jpopgen/dbNSFP/", + "hidden": true + }, + "dbnsfp_tbi": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to dbNSFP tabix indexed file.", + "help_text": "To be used with `--vep_dbnsfp`.", + "hidden": true + }, + "dbnsfp_consequence": { + "type": "string", + "fa_icon": "fas fa-arrow-alt-circle-right", + "description": "Consequence to annotate with", + "help_text": "To be used with `--vep_dbnsfp`.\nThis params is used to filter/limit outputs to a specific effect of the variant.\nThe set of consequence terms is defined by the Sequence Ontology and an overview of those used in VEP can be found here: https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html\nIf one wants to filter using several consequences, then separate those by using '&' (i.e. 'consequence=3_prime_UTR_variant&intron_variant'.", + "hidden": true + }, + "dbnsfp_fields": { + "type": "string", + "fa_icon": "fas fa-border-all", + "description": "Fields to annotate with", + "default": "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF", + "help_text": "To be used with `--vep_dbnsfp`.\nThis params can be used to retrieve individual values from the dbNSFP file. The values correspond to the name of the columns in the dbNSFP file and are separated by comma.\nThe column names might differ between the different dbNSFP versions. Please check the Readme.txt file, which is provided with the dbNSFP file, to obtain the correct column names. The Readme file contains also a short description of the provided values and the version of the tools used to generate them.\n\nDefault value are explained below:\n\nrs_dbSNP - rs number from dbSNP\nHGVSc_VEP - HGVS coding variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_transcriptid\nHGVSp_VEP - HGVS protein variant presentation from VEP. Multiple entries separated by ';', corresponds to Ensembl_proteinid\n1000Gp3_EAS_AF - Alternative allele frequency in the 1000Gp3 East Asian descendent samples\n1000Gp3_AMR_AF - Alternative allele counts in the 1000Gp3 American descendent samples\nLRT_score - Original LRT two-sided p-value (LRTori), ranges from 0 to 1\nGERP++_RS - Conservation score. The larger the score, the more conserved the site, ranges from -12.3 to 6.17\ngnomAD_exomes_AF - Alternative allele frequency in the whole gnomAD exome samples.", + "hidden": true + }, + "vep_loftee": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP LOFTEE plugin.", + "hidden": true, + "help_text": "For details, see [here](https://github.com/konradjk/loftee)." + }, + "vep_spliceai": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP SpliceAI plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceai)." + }, + "spliceai_snv": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores snv file.", + "help_text": "To be used with `--vep_spliceai`.", + "hidden": true + }, + "spliceai_snv_tbi": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores snv tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", + "hidden": true + }, + "spliceai_indel": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores indel file.", + "help_text": "To be used with `--vep_spliceai`.", + "hidden": true + }, + "spliceai_indel_tbi": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores indel tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", + "hidden": true + }, + "vep_spliceregion": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP SpliceRegion plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceregion) and [here](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/)." + }, + "vep_custom_args": { + "type": "string", + "default": "--everything --filter_common --per_gene --total_length --offline --format vcf", + "fa_icon": "fas fa-toolbox", + "description": "Add an extra custom argument to VEP.", + "help_text": "Using this params you can add custom args to VEP." + }, + "use_annotation_cache_keys": { + "type": "boolean", + "fa_icon": "fas fa-toolbox", + "description": "Use annotation cache keys for snpeff_cache and vep_cache.\nOnly when using annotation-cache or a similar structure.\nSee [here](https://annotation-cache.github.io/) for more information.", + "hidden": true + }, + "outdir_cache": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the cache will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open", + "hidden": true + }, + "vep_out_format": { + "type": "string", + "default": "vcf", + "description": "VEP output-file format.", + "enum": ["json", "tab", "vcf"], + "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", + "fa_icon": "fas fa-table", + "hidden": true + } + } + }, + "pipeline_stage_options": { + "title": "Pipeline stage options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define parameters that control the stages in the pipeline", + "properties": { + "skip_baserecalibration": { + "type": "boolean", + "description": "Skip the process of base recalibration steps i.e., GATK BaseRecalibrator and GATK ApplyBQSR.", + "help_text": "This parameter disable the base recalibration step, thus using a un-calibrated BAM file for variant calling." + }, + "skip_intervallisttools": { + "type": "boolean", + "description": "Skip the process of preparing interval lists for the GATK variant calling step", + "help_text": "This parameter disable preparing multiple interval lists to use with HaplotypeCaller module of GATK. It is recommended not to disable the step as it is required to run the variant calling correctly." + }, + "skip_variantfiltration": { + "type": "boolean", + "description": "Skip variant filtering of GATK", + "help_text": "Set this parameter if you don't want to filter any variants." + }, + "skip_variantannotation": { + "type": "boolean", + "description": "Skip variant annotation", + "help_text": "Set this parameter if you don't want to run variant annotation." + }, + "skip_multiqc": { + "type": "boolean", + "description": "Skip MultiQC reports", + "help_text": "This parameter disable all QC reports" + } + } + }, + "tool_parameters": { + "title": "Tool parameters", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define parameters of the tools used in the pipeline", + "properties": { + "gatk_interval_scatter_count": { + "type": "integer", + "default": 25, + "description": "Number of times the gene interval list to be split in order to run GATK haplotype caller in parallel", + "help_text": "Set this parameter to decide the number of splits for the gene interval list file." + }, + "no_intervals": { + "type": "boolean", + "description": "Do not use gene interval file during variant calling", + "help_text": "This parameter, if set to True, does not use the gene intervals during the variant calling step, which then results in variants from all regions including non-genic. Default is False" + }, + "gatk_vf_window_size": { + "type": "integer", + "default": 35, + "description": "The window size (in bases) in which to evaluate clustered SNPs.", + "help_text": "This parameter is used by GATK variant filteration step. It defines the window size (in bases) in which to evaluate clustered SNPs. It has to be used together with the other option 'cluster'." + }, + "gatk_vf_cluster_size": { + "type": "integer", + "default": 3, + "description": "The number of SNPs which make up a cluster. Must be at least 2.", + "help_text": "This parameter is used by GATK variant filteration step. It defines the number of SNPs which make up a cluster within a window. Must be at least 2." + }, + "gatk_vf_fs_filter": { + "type": "number", + "default": 30.0, + "description": "Value to be used for the FisherStrand (FS) filter", + "help_text": "This parameter defines the value to use for the FisherStrand (FS) filter in the GATK variant-filtering step. \nThe value should given in a float number format. Default is 30.0" + }, + "gatk_vf_qd_filter": { + "type": "number", + "default": 2.0, + "description": "Value to be used for the QualByDepth (QD) filter", + "help_text": "This parameter defines the value to use for the QualByDepth (QD) filter in the GATK variant-filtering step. \nThe value should given in a float number format. Default is 2.0" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -212,11 +647,18 @@ "fa_icon": "fas fa-check-square", "hidden": true }, + "modules_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data", + "hidden": true + }, "pipelines_testdata_base_path": { "type": "string", "fa_icon": "far fa-check-circle", "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data", "hidden": true } } @@ -234,6 +676,25 @@ }, { "$ref": "#/$defs/generic_options" + }, + { + "$ref": "#/$defs/tool_parameters" + }, + { + "$ref": "#/$defs/pipeline_stage_options" + }, + { + "$ref": "#/$defs/alignment_options" + }, + { + "$ref": "#/$defs/variant_calling" + }, + { + "$ref": "#/$defs/variant_annotation" + }, + { + "$ref": "#/$defs/preprocessing" } - ] + ], + "required": ["aligner", "seq_platform"] } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..69ef7315 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,19 @@ +config { + // location for all nf-tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "conf/test.config" + + // run all test with defined profile(s) from the main nextflow.config + profile "test" + + // Include plugins + plugins { + load "nft-bam@0.4.0" + load "nft-utils@0.0.3" + } +} diff --git a/subworkflows/local/annotation_cache_initialisation/main.nf b/subworkflows/local/annotation_cache_initialisation/main.nf new file mode 100644 index 00000000..6e35a68d --- /dev/null +++ b/subworkflows/local/annotation_cache_initialisation/main.nf @@ -0,0 +1,57 @@ +// +// ANNOTATION CACHE INITIALISATION +// + +// Initialise channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// Condition is based on params.step and params.tools +// If and extra condition exists, it's specified in comments + +workflow ANNOTATION_CACHE_INITIALISATION { + take: + snpeff_enabled + snpeff_cache + snpeff_genome + snpeff_db + vep_enabled + vep_cache + vep_species + vep_cache_version + vep_genome + help_message + + main: + if (snpeff_enabled) { + def snpeff_annotation_cache_key = (snpeff_cache == "s3://annotation-cache/snpeff_cache/") ? "${snpeff_genome}.${snpeff_db}/" : "" + def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_genome}.${snpeff_db}" + def snpeff_cache_path_full = file("$snpeff_cache/$snpeff_cache_dir", type: 'dir') + if ( !snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory() ) { + if (snpeff_cache == "s3://annotation-cache/snpeff_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } else { + error("Path provided with SnpEff cache is invalid.\nMake sure there is a directory named ${snpeff_cache_dir} in ${snpeff_cache}./n${help_message}") + } + } + snpeff_cache = Channel.fromPath(file("${snpeff_cache}/${snpeff_annotation_cache_key}"), checkIfExists: true).collect() + .map{ cache -> [ [ id:"${snpeff_genome}.${snpeff_db}" ], cache ] } + } else snpeff_cache = [] + + if (vep_enabled) { + def vep_annotation_cache_key = (vep_cache == "s3://annotation-cache/vep_cache/") ? "${vep_cache_version}_${vep_genome}/" : "" + def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}/${vep_cache_version}_${vep_genome}" + def vep_cache_path_full = file("$vep_cache/$vep_cache_dir", type: 'dir') + if ( !vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory() ) { + if (vep_cache == "s3://annotation-cache/vep_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } else { + error("Path provided with VEP cache is invalid.\nMake sure there is a directory named ${vep_cache_dir} in ${vep_cache}./n${help_message}") + } + } + ensemblvep_cache = Channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() + } else ensemblvep_cache = [] + + emit: + ensemblvep_cache // channel: [ meta, cache ] + snpeff_cache // channel: [ meta, cache ] +} diff --git a/subworkflows/local/cram_qc_mosdepth_samtools/main.nf b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf new file mode 100644 index 00000000..e2b51418 --- /dev/null +++ b/subworkflows/local/cram_qc_mosdepth_samtools/main.nf @@ -0,0 +1,38 @@ +// +// QC on CRAM +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { MOSDEPTH } from '../../../modules/nf-core/mosdepth/main' + +workflow CRAM_QC_MOSDEPTH_SAMTOOLS { + take: + cram // channel: [mandatory] [ meta, cram, crai ] + fasta // channel: [mandatory] [ fasta ] + intervals + + main: + versions = Channel.empty() + reports = Channel.empty() + + // Reports run on cram + SAMTOOLS_STATS(cram, fasta) + + MOSDEPTH(cram.combine(intervals.map{ meta, bed -> [ bed ?: [] ] }), fasta) + + // Gather all reports generated + reports = reports.mix(SAMTOOLS_STATS.out.stats) + reports = reports.mix(MOSDEPTH.out.global_txt) + reports = reports.mix(MOSDEPTH.out.regions_txt) + + // Gather versions of all tools used + versions = versions.mix(MOSDEPTH.out.versions) + versions = versions.mix(SAMTOOLS_STATS.out.versions.first()) + + emit: + reports + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/download_cache_snpeff_vep/main.nf b/subworkflows/local/download_cache_snpeff_vep/main.nf new file mode 100644 index 00000000..f9f776db --- /dev/null +++ b/subworkflows/local/download_cache_snpeff_vep/main.nf @@ -0,0 +1,34 @@ +// +// DOWNLOAD CACHE SNPEFF VEP +// + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// Condition is based on params.step and params.tools +// If and extra condition exists, it's specified in comments + +include { ENSEMBLVEP_DOWNLOAD } from '../../../modules/nf-core/ensemblvep/download/main' +include { SNPEFF_DOWNLOAD } from '../../../modules/nf-core/snpeff/download/main' + +workflow DOWNLOAD_CACHE_SNPEFF_VEP { + take: + ensemblvep_info + snpeff_info + + main: + versions = Channel.empty() + + ENSEMBLVEP_DOWNLOAD(ensemblvep_info) + SNPEFF_DOWNLOAD(snpeff_info) + + // Gather versions of all tools used + versions = versions.mix(ENSEMBLVEP_DOWNLOAD.out.versions) + versions = versions.mix(SNPEFF_DOWNLOAD.out.versions) + + emit: + ensemblvep_cache = ENSEMBLVEP_DOWNLOAD.out.cache.collect() // channel: [ meta, cache ] + snpeff_cache = SNPEFF_DOWNLOAD.out.cache.collect() // channel: [ meta, cache ] + + versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf new file mode 100755 index 00000000..a568e1a8 --- /dev/null +++ b/subworkflows/local/prepare_genome/main.nf @@ -0,0 +1,80 @@ +// +// Prepare reference genome files +// + +include { BEDTOOLS_MERGE } from '../../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_SORT } from '../../../modules/nf-core/bedtools/sort/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary/main' +include { GFFREAD } from '../../../modules/nf-core/gffread/main' +include { GTF2BED } from '../../../modules/local/gtf2bed' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate/main' +include { GUNZIP as GUNZIP_FASTA } from '../../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GTF } from '../../../modules/nf-core/gunzip/main' +include { TABIX_TABIX as TABIX_DBSNP } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_KNOWN_INDELS } from '../../../modules/nf-core/tabix/tabix/main' + +workflow PREPARE_GENOME { + take: + ch_fasta_raw // file: /path/to/genome.fasta + ch_gff // file: /path/to/genome.gff + ch_gtf_raw // file: /path/to/genome.gtf + ch_dbsnp + ch_known_indels + feature_type + + main: + ch_versions = Channel.empty() + + //Unzip reference genome files if needed + + if (params.fasta.endsWith('.gz')) { + GUNZIP_FASTA(ch_fasta_raw) + + ch_fasta = GUNZIP_FASTA.out.gunzip + + } else { + ch_fasta = ch_fasta_raw + } + + if (params.gtf.endsWith('.gz')) { + GUNZIP_GTF(ch_gtf_raw) + + ch_gtf = GUNZIP_GTF.out.gunzip + + } else { + ch_gtf = ch_gtf_raw + } + + GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) + GFFREAD(ch_gff, ch_fasta) + SAMTOOLS_FAIDX(ch_fasta, [['id':'genome'], []]) + TABIX_DBSNP(ch_dbsnp.flatten().map{ it -> [ [ id:it.baseName ], it ] }) + TABIX_KNOWN_INDELS(ch_known_indels.flatten().map{ it -> [ [ id:it.baseName ], it ] } ) + + ch_gtf = ch_gtf.mix(GFFREAD.out.gtf) + + GTF2BED(ch_gtf, feature_type) + STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + + ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(GFFREAD.out.versions) + ch_versions = ch_versions.mix(GTF2BED.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + ch_versions = ch_versions.mix(TABIX_DBSNP.out.versions) + ch_versions = ch_versions.mix(TABIX_KNOWN_INDELS.out.versions) + + emit: + dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict // path: genome.fasta.dict + exon_bed = GTF2BED.out.bed.map{ bed -> [ [ id:bed.baseName ], bed ] }.collect() // path: exon.bed + fasta = ch_fasta + fasta_fai = SAMTOOLS_FAIDX.out.fai.map{ meta, fai -> [fai] } // path: genome.fasta.fai + gtf = ch_gtf // path: genome.gtf + star_index = STAR_GENOMEGENERATE.out.index // path: star/index/ + dbsnp_tbi = TABIX_DBSNP.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: dbsnb.vcf.gz.tbi + known_indels_tbi = TABIX_KNOWN_INDELS.out.tbi.map{ meta, tbi -> [tbi] }.collect() // path: {known_indels*}.vcf.gz.tbi + versions = ch_versions // channel: [ versions.yml ] + // bedtools_sort = ch_bedtools_sort // path: sort.bed + // bedtools_merge = ch_bedtools_merge // path: merge.bed +} diff --git a/subworkflows/local/recalibrate/main.nf b/subworkflows/local/recalibrate/main.nf new file mode 100644 index 00000000..bbf80e1b --- /dev/null +++ b/subworkflows/local/recalibrate/main.nf @@ -0,0 +1,64 @@ +/* +======================================================================================== + RECALIBRATE +======================================================================================== +*/ + +include { GATK4_APPLYBQSR as APPLYBQSR } from '../../../modules/nf-core/gatk4/applybqsr/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' + +workflow RECALIBRATE { + take: + skip_samtools // boolean: true/false + bam // channel: [mandatory] bam + dict // channel: [mandatory] dict + fai // channel: [mandatory] fai + fasta // channel: [mandatory] fasta + + main: + + ch_versions = Channel.empty() + + bam_recalibrated_index = Channel.empty() + bam_recalibrated = Channel.empty() + bam_reports = Channel.empty() + + APPLYBQSR ( + bam, + fasta, + fai, + dict + ) + bam_recalibrated = APPLYBQSR.out.bam + ch_versions = ch_versions.mix(APPLYBQSR.out.versions.first()) + + SAMTOOLS_INDEX(bam_recalibrated) + + bam_recalibrated_index = bam_recalibrated + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map{meta, bam, bai, csi -> + if (bai) [meta, bam, bai] + else [meta, bam, csi] + } + + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + samtools_stats = Channel.empty() + + if (!skip_samtools) { + SAMTOOLS_STATS(bam_recalibrated_index, [[], []]) + samtools_stats = SAMTOOLS_STATS.out.stats + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + } + bam_reports = samtools_stats + + + emit: + bam = bam_recalibrated_index + qc = bam_reports + + versions = ch_versions + +} diff --git a/subworkflows/local/splitncigar/main.nf b/subworkflows/local/splitncigar/main.nf new file mode 100644 index 00000000..91524e64 --- /dev/null +++ b/subworkflows/local/splitncigar/main.nf @@ -0,0 +1,54 @@ +// +// Subworkflow: Run GATK4 SplitNCigarReads with intervals, merge and index BAM file. +// + +include { GATK4_SPLITNCIGARREADS } from '../../../modules/nf-core/gatk4/splitncigarreads/main' +include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' + +workflow SPLITNCIGAR { + take: + bam // channel: [ val(meta), [ bam ], [bai] ] + ch_fasta // channel: [ fasta ] + ch_fai // channel: [ fai ] + ch_dict // channel: [ dict ] + intervals // channel: [ interval_list] + + main: + ch_versions = Channel.empty() + + bam_interval = bam.combine(intervals).map{ meta, bam, bai, intervals -> [ meta + [sample:meta.id], bam, bai, intervals ] } + + GATK4_SPLITNCIGARREADS(bam_interval, + ch_fasta, + ch_fai.map{ fai -> [[id:'genome'], fai] }, + ch_dict) + + bam_splitncigar = GATK4_SPLITNCIGARREADS.out.bam + ch_versions = ch_versions.mix(GATK4_SPLITNCIGARREADS.out.versions) + + bam_splitncigar_interval = bam_splitncigar.map{ meta, bam -> [ meta + [id:meta.sample] - meta.subMap('sample'), bam ] }.groupTuple() + + SAMTOOLS_MERGE(bam_splitncigar_interval, + ch_fasta, + ch_fai.map{ fai -> [[id:fai.baseName], fai] }) + + splitncigar_bam = SAMTOOLS_MERGE.out.bam + ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) + + SAMTOOLS_INDEX(splitncigar_bam) + + splitncigar_bam_bai = splitncigar_bam + .join(SAMTOOLS_INDEX.out.bai, remainder: true) + .join(SAMTOOLS_INDEX.out.csi, remainder: true) + .map{meta, bam, bai, csi -> + if (bai) [meta, bam, bai] + else [meta, bam, csi] + } + + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) + + emit: + bam_bai = splitncigar_bam_bai + versions = ch_versions +} diff --git a/subworkflows/local/utils_nfcore_rnavar_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnavar_pipeline/main.nf index 385cfe0b..431e7286 100644 --- a/subworkflows/local/utils_nfcore_rnavar_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnavar_pipeline/main.nf @@ -68,29 +68,34 @@ workflow PIPELINE_INITIALISATION { // validateInputParameters() + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.dbsnp, + params.dbsnp_tbi, + params.dict, + params.fasta, + params.fasta_fai, + params.gff, + params.gtf, + params.input, + params.known_indels, + params.known_indels_tbi, + params.star_index + ] + + // only check if we are using the annotate_tools + if (params.annotate_tools && (params.annotate_tools.split(',').contains('snpeff') || params.annotate_tools.split(',').contains('merge'))) checkPathParamList.add(params.snpeff_cache) + if (params.annotate_tools && (params.annotate_tools.split(',').contains('vep') || params.annotate_tools.split(',').contains('merge'))) checkPathParamList.add(params.vep_cache) + // // Create channel from input file provided through params.input // - Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) + ch_samplesheet = Channel.fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .map{ meta, fastq_1, fastq_2 -> + if (!fastq_2) return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + else return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } - .set { ch_samplesheet } emit: samplesheet = ch_samplesheet @@ -170,16 +175,26 @@ def validateInputSamplesheet(input) { return [ metas[0], fastqs ] } + // -// Get attribute from genome config file e.g. fasta +// Function to check samples are internally consistent after being grouped // -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } +def checkSamplesAfterGrouping(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same strandedness + def strandedness_ok = metas.collect{ it.strandedness }.unique().size == 1 + if (!strandedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must have the same strandedness!: ${metas[0].id}") } - return null + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] } // diff --git a/subworkflows/local/vcf_annotate_all/main.nf b/subworkflows/local/vcf_annotate_all/main.nf new file mode 100644 index 00000000..20cab4be --- /dev/null +++ b/subworkflows/local/vcf_annotate_all/main.nf @@ -0,0 +1,75 @@ +// +// ANNOTATION +// + +include { VCF_ANNOTATE_BCFTOOLS } from '../vcf_annotate_bcftools/main' +include { VCF_ANNOTATE_ENSEMBLVEP } from '../../nf-core/vcf_annotate_ensemblvep/main' +include { VCF_ANNOTATE_ENSEMBLVEP as VCF_ANNOTATE_MERGE } from '../../nf-core/vcf_annotate_ensemblvep/main' +include { VCF_ANNOTATE_SNPEFF } from '../../nf-core/vcf_annotate_snpeff/main' + +workflow VCF_ANNOTATE_ALL { + take: + vcf // channel: [ val(meta), vcf ] + fasta + tools // Mandatory, list of tools to apply + snpeff_db + snpeff_cache + vep_genome + vep_species + vep_cache_version + vep_cache + vep_extra_files + bcftools_annotations + bcftools_annotations_index + bcftools_header_lines + + main: + reports = Channel.empty() + vcf_ann = Channel.empty() + tab_ann = Channel.empty() + json_ann = Channel.empty() + versions = Channel.empty() + + if (tools.split(',').contains('bcfann')) { + VCF_ANNOTATE_BCFTOOLS(vcf, bcftools_annotations, bcftools_annotations_index, bcftools_header_lines) + + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_BCFTOOLS.out.vcf_tbi) + versions = versions.mix(VCF_ANNOTATE_BCFTOOLS.out.versions) + } + + + if (tools.split(',').contains('merge') || tools.split(',').contains('snpeff')) { + VCF_ANNOTATE_SNPEFF(vcf, snpeff_db, snpeff_cache) + + reports = reports.mix(VCF_ANNOTATE_SNPEFF.out.reports.map{ meta, reports -> [ reports ] }) + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_SNPEFF.out.vcf_tbi) + versions = versions.mix(VCF_ANNOTATE_SNPEFF.out.versions) + } + + if (tools.split(',').contains('merge')) { + vcf_ann_for_merge = VCF_ANNOTATE_SNPEFF.out.vcf_tbi.map{ meta, vcf, tbi -> [ meta, vcf, [] ] } + VCF_ANNOTATE_MERGE(vcf_ann_for_merge, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + + reports = reports.mix(VCF_ANNOTATE_MERGE.out.reports) + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_MERGE.out.vcf_tbi) + versions = versions.mix(VCF_ANNOTATE_MERGE.out.versions) + } + + if (tools.split(',').contains('vep')) { + vcf_for_vep = vcf.map{ meta, vcf -> [ meta, vcf, [] ] } + VCF_ANNOTATE_ENSEMBLVEP(vcf_for_vep, fasta, vep_genome, vep_species, vep_cache_version, vep_cache, vep_extra_files) + + reports = reports.mix(VCF_ANNOTATE_ENSEMBLVEP.out.reports) + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.vcf_tbi) + tab_ann = tab_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.tab) + json_ann = json_ann.mix(VCF_ANNOTATE_ENSEMBLVEP.out.json) + versions = versions.mix(VCF_ANNOTATE_ENSEMBLVEP.out.versions) + } + + emit: + vcf_ann // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + tab_ann + json_ann + reports // path: *.html + versions // path: versions.yml +} diff --git a/subworkflows/local/vcf_annotate_bcftools/main.nf b/subworkflows/local/vcf_annotate_bcftools/main.nf new file mode 100644 index 00000000..f616af8c --- /dev/null +++ b/subworkflows/local/vcf_annotate_bcftools/main.nf @@ -0,0 +1,31 @@ + +// +// Run BCFtools to annotate VCF files +// + +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' + +workflow VCF_ANNOTATE_BCFTOOLS { + take: + vcf // channel: [ val(meta), vcf ] + annotations // + annotations_index // + header_lines // + + main: + ch_versions = Channel.empty() + + BCFTOOLS_ANNOTATE(vcf, annotations, annotations_index, header_lines) + TABIX_TABIX(BCFTOOLS_ANNOTATE.out.vcf) + + ch_vcf_tbi = BCFTOOLS_ANNOTATE.out.vcf.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf new file mode 100644 index 00000000..2de059b8 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -0,0 +1,54 @@ +// +// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +// + +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_MARKDUPLICATES_PICARD { + + take: + ch_reads // channel: [ val(meta), path(reads) ] + ch_fasta // channel: [ path(fasta) ] + ch_fai // channel: [ path(fai) ] + + main: + + ch_versions = Channel.empty() + + PICARD_MARKDUPLICATES ( ch_reads, ch_fasta, ch_fai ) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + + ch_markdup = PICARD_MARKDUPLICATES.out.bam.mix(PICARD_MARKDUPLICATES.out.cram) + + SAMTOOLS_INDEX ( ch_markdup ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_reads_index = ch_markdup + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.crai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map{meta, reads, bai, crai, csi -> + if (bai) [ meta, reads, bai ] + else if (crai) [ meta, reads, crai ] + else [ meta, reads, csi ] + } + + BAM_STATS_SAMTOOLS ( ch_reads_index, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + cram = PICARD_MARKDUPLICATES.out.cram // channel: [ val(meta), path(cram) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + crai = SAMTOOLS_INDEX.out.crai // channel: [ val(meta), path(crai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml new file mode 100644 index 00000000..433d35b2 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -0,0 +1,71 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_markduplicates_picard" +description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - markduplicates + - bam + - sam + - cram +components: + - picard/markduplicates + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - ch_reads: + description: | + Sequence reads in BAM/CRAM/SAM format + Structure: [ val(meta), path(reads) ] + - ch_fasta: + description: | + Reference genome fasta file required for CRAM input + Structure: [ path(fasta) ] + - ch_fasta: + description: | + Index of the reference genome fasta file + Structure: [ path(fai) ] +output: + - bam: + description: | + processed BAM/SAM file + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAM/SAM samtools index + Structure: [ val(meta), path(bai) ] + - cram: + description: | + processed CRAM file + Structure: [ val(meta), path(cram) ] + - crai: + description: | + CRAM samtools index + Structure: [ val(meta), path(crai) ] + - csi: + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@dmarron" + - "@drpatelh" +maintainers: + - "@dmarron" + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test new file mode 100644 index 00000000..5ef337dc --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_workflow { + + name "Test Workflow BAM_MARKDUPLICATES_PICARD" + script "../main.nf" + workflow "BAM_MARKDUPLICATES_PICARD" + + tag "picard" + tag "picard/markduplicates" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_markduplicates_picard" + tag "subworkflows/bam_markduplicates_picard" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/index" + tag "samtools/stats" + + test("sarscov2 - bam") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + path(workflow.out.bam[0][1]), + path(workflow.out.bai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]), + ).match("sarscov2 - bam") }, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("97") } + ) + } + } + + test("homo_sapiens - cram") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.cram[0][1]).name, + path(workflow.out.crai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]), + ).match("homo_sapiens - cram") }, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("0.999986") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap new file mode 100644 index 00000000..caf4ac8a --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap @@ -0,0 +1,30 @@ +{ + "homo_sapiens - cram": { + "content": [ + "test.cram", + "test.cram.crai:md5,78d47ba01ac4e05f3ae1e353902a989e", + "test.flagstat:md5,93b0ef463df947ede1f42ff60396c34d", + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15", + "test.stats:md5,c2f74a4d9b2377bcf4f4f184da3801af" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T20:45:38.364189" + }, + "sarscov2 - bam": { + "content": [ + "test.bam:md5,3091fe6ba1b7530f382fe40b9fd8f45b", + "test.bam.bai:md5,4d3ae8d013444b55e17aa0149a2ab404", + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783", + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2", + "test.stats:md5,d7796222a087b9bb97f631f1c21b9c95" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T11:38:08.434529" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml b/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml new file mode 100644 index 00000000..10b85270 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_markduplicates_picard: + - subworkflows/nf-core/bam_markduplicates_picard/** diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 00000000..b716375b --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..e01f9ccf --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..75b5b934 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_workflow { + + name "Test Workflow BAM_SORT_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_SORT_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" + tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + + test("test_bam_sort_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_paired_end_idxstats") } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..6645a092 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,110 @@ +{ + "test_bam_sort_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-10-22T20:25:03.687121177" + }, + "test_bam_sort_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-10-22T20:25:03.709648916" + }, + "test_bam_sort_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,cb0bf2b79de52fdf0c61e80efcdb0bb4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:38.553256801" + }, + "test_bam_sort_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d7796222a087b9bb97f631f1c21b9c95" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:48.355870518" + }, + "test_bam_sort_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:10:02.84631" + }, + "test_bam_sort_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:10:02.829756" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..30b69d6a --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_sort_stats_samtools: + - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..44d4c010 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..809bf736 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..c8b21f28 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_cram_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_cram_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_cram_idxstats") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..bf0b0c69 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,164 @@ +{ + "test_bam_stats_samtools_paired_end_cram_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:31:26.194017574" + }, + "test_bam_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,ddaf8f33fe9c1ebe9b06933213aec8ed" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:45:06.230091746" + }, + "test_bam_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:17:27.717482" + }, + "test_bam_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:26:10.340046381" + }, + "test_bam_stats_samtools_paired_end_cram_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:31:26.207052003" + }, + "test_bam_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,dc178e1a4956043aba8abc83e203521b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:57.442208382" + }, + "test_bam_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:17:27.726719" + }, + "test_bam_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:26:10.349439801" + }, + "test_bam_stats_samtools_paired_end_cram_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d3345c4887f4a9ea4f7f56405b495db0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:45:14.997164209" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..ec2f2d68 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_stats_samtools: + - subworkflows/nf-core/bam_stats_samtools/** diff --git a/subworkflows/nf-core/fastq_align_star/main.nf b/subworkflows/nf-core/fastq_align_star/main.nf new file mode 100644 index 00000000..00d553d9 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/main.nf @@ -0,0 +1,68 @@ +include { STAR_ALIGN } from '../../../modules/nf-core/star/align/main' +include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_GENOME } from '../bam_sort_stats_samtools/main' +include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME } from '../bam_sort_stats_samtools/main' + + +workflow FASTQ_ALIGN_STAR { + + take: + ch_reads // channel: [ val(meta), [ path(reads) ] ] + ch_index // channel: [ path(index) ] + ch_gtf // channel: [ path(gtf) ] + val_star_ignore_sjdbgtf // boolean: when using pre-built STAR indices do not re-extract and use splice junctions from the GTF file + val_seq_platform // string : sequencing platform + val_seq_center // string : sequencing center + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_transcripts_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + // + // Map reads with STAR + // + STAR_ALIGN ( ch_reads, ch_index, ch_gtf, val_star_ignore_sjdbgtf, val_seq_platform, val_seq_center ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS_GENOME ( STAR_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS_GENOME.out.versions) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + // Only runs when '--quantMode TranscriptomeSAM' is set in args and + // STAR_ALIGN.out.bam_transcript is populated + // + + BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME ( STAR_ALIGN.out.bam_transcript, ch_transcripts_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.versions) + + emit: + + orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), path(bam) ] + log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), path(log_final) ] + log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), path(log_out) ] + log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), path(log_progress) ] + bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), path(bam) ] + fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), path(fastq) ] + tab = STAR_ALIGN.out.tab // channel: [ val(meta), path(tab) ] + orig_bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), path(bam) ] + + bam = BAM_SORT_STATS_SAMTOOLS_GENOME.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS_GENOME.out.bai // channel: [ val(meta), path(bai) ] + stats = BAM_SORT_STATS_SAMTOOLS_GENOME.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS_GENOME.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS_GENOME.out.idxstats // channel: [ val(meta), path(idxstats) ] + + bam_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.bam // channel: [ val(meta), path(bam) ] + bai_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.bai // channel: [ val(meta), path(bai) ] + stats_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.stats // channel: [ val(meta), path(stats) ] + flagstat_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_align_star/meta.yml b/subworkflows/nf-core/fastq_align_star/meta.yml new file mode 100644 index 00000000..796d3e4b --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/meta.yml @@ -0,0 +1,133 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_star" +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - star/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + - ch_index: + type: directory + description: STAR genome index + pattern: "star" + - ch_gtf: + type: file + description: | + GTF file used to set the splice junctions with the --sjdbGTFfile flag + pattern: "*.gtf" + - val_star_ignore_sjdbgtf: + type: boolean + description: | + If true the --sjdbGTFfile flag is set + pattern: "true|false" + - val_seq_platform: + type: string + description: | + Sequencing platform to be added to the bam header using the --outSAMattrRGline flag + - val_seq_center: + type: string + description: | + Sequencing center to be added to the bam header using the --outSAMattrRGline flag + - ch_fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa,fna}" + - ch_transcripts_fasta: + type: file + description: Optional reference genome fasta file + pattern: "*.{fasta,fa,fna}" +output: + - orig_bam: + description: | + Output BAM file containing read alignments + Structure: [ val(meta), path(bam) ] + - log_final: + description: | + STAR final log file + Structure: [ val(meta), path(log_final) ] + - log_out: + description: | + STAR log out file + Structure: [ val(meta), path(log_out) ] + - log_progress: + description: | + STAR log progress file + Structure: [ val(meta), path(log_progress) ] + - bam_sorted: + description: | + Sorted BAM file of read alignments (optional) + Structure: [ val(meta), path(bam) ] + - orig_bam_transcript: + description: | + Output BAM file of transcriptome alignment (optional) + Structure: [ val(meta), path(bam) ] + - fastq: + description: | + Unmapped FastQ files (optional) + Structure: [ val(meta), path(fastq) ] + - tab: + description: | + STAR output tab file(s) (optional) + Structure: [ val(meta), path(tab) ] + - bam: + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - bam_transcript: + description: | + Transcriptome-level BAM file ordered by samtools (optional) + Structure: [ val(meta), path(bam) ] + - bai_transcript: + description: | + Transcriptome-level BAI index of the ordered BAM file (optional) + Structure: [ val(meta), path(bai) ] + - stats_transcript: + description: | + Transcriptome-level file containing samtools stats output (optional) + Structure: [ val(meta), path(stats) ] + - flagstat_transcript: + description: | + Transcriptome-level file containing samtools flagstat output (optional) + Structure: [ val(meta), path(flagstat) ] + - idxstats_transcript: + description: | + Transcriptome-level file containing samtools idxstats output (optional) + Structure: [ val(meta), path(idxstats) ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" +maintainers: + - "@JoseEspinosa" diff --git a/subworkflows/nf-core/fastq_align_star/nextflow.config b/subworkflows/nf-core/fastq_align_star/nextflow.config new file mode 100644 index 00000000..926eae71 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/nextflow.config @@ -0,0 +1,10 @@ +// IMPORTANT: Add this configuration to your modules.config + +process { + withName: ".*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS_GENOME:.*" { + ext.prefix = {"${meta.id}_genome"} + } + withName: ".*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME:.*" { + ext.prefix = {"${meta.id}_transcriptome"} + } +} diff --git a/subworkflows/nf-core/fastq_align_star/tests/main.nf.test b/subworkflows/nf-core/fastq_align_star/tests/main.nf.test new file mode 100644 index 00000000..2c512c96 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/tests/main.nf.test @@ -0,0 +1,262 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_STAR" + script "../main.nf" + workflow "FASTQ_ALIGN_STAR" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_star" + tag "star/align" + tag "star/genomegenerate" + tag "rsem/preparereference" + tag "subworkflows/bam_sort_stats_samtools" + + setup { + run("STAR_GENOMEGENERATE") { + script "../../../../modules/nf-core/star/genomegenerate/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + test("homo_sapiens - fastq - single_end") { + config "./nextflow.config" + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = true // star_ignore_sjdbgtf + input[4] = 'illumina' // seq_platform + input[5] = false // seq_center + input[6] = Channel.of([ + [ id:'test_fasta' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[7] = Channel.of([[:], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.orig_bam).match('orig_bam_single_end')}, + { assert snapshot(workflow.out.bam_sorted).match('bam_sorted_single_end')}, + { assert snapshot(workflow.out.fastq).match('fastq_single_end')}, + { assert snapshot(workflow.out.tab).match('tab_single_end')}, + { assert snapshot(workflow.out.orig_bam_transcript).match('orig_bam_transcript_single_end')}, + { assert snapshot(workflow.out.bam).match('bam_single_end')}, + { assert snapshot(workflow.out.bai).match('bai_single_end')}, + { assert snapshot(workflow.out.stats).match('stats_single_end')}, + { assert snapshot(workflow.out.flagstat).match('flagstat_single_end')}, + { assert snapshot(workflow.out.idxstats).match('idxstats_single_end')}, + { assert snapshot(workflow.out.bam_transcript).match('bam_transcript_single_end')}, + { assert snapshot(workflow.out.bai_transcript).match('bai_transcript_single_end')}, + { assert snapshot(workflow.out.stats_transcript).match('stats_transcript_single_end')}, + { assert snapshot(workflow.out.flagstat_transcript).match('flagstat_transcript_single_end')}, + { assert snapshot(workflow.out.idxstats_transcript).match('idxstats_transcript_single_end')}, + { assert path(workflow.out.log_out.get(0).get(1)).exists() }, + { assert path(workflow.out.log_final.get(0).get(1)).exists() }, + { assert path(workflow.out.log_progress.get(0).get(1)).exists() } + ) + } + } + + test("homo_sapiens - fastq - paired_end") { + config "./nextflow.config" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = true // star_ignore_sjdbgtf + input[4] = 'illumina' // seq_platform + input[5] = false // seq_center + input[6] = Channel.of([ + [ id:'test_fasta' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[7] = Channel.of([[:], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.orig_bam).match('orig_bam_paired_end')}, + { assert snapshot(workflow.out.bam_sorted).match('bam_sorted_paired_end')}, + { assert snapshot(workflow.out.fastq).match('fastq_paired_end')}, + { assert snapshot(workflow.out.tab).match('tab_paired_end')}, + { assert snapshot(workflow.out.orig_bam_transcript).match('orig_bam_transcript_paired_end')}, + { assert snapshot(workflow.out.bam).match('bam_paired_end')}, + { assert snapshot(workflow.out.bai).match('bai_paired_end')}, + { assert snapshot(workflow.out.stats).match('stats_paired_end')}, + { assert snapshot(workflow.out.flagstat).match('flagstat_paired_end')}, + { assert snapshot(workflow.out.idxstats).match('idxstats_paired_end')}, + { assert snapshot(workflow.out.bam_transcript).match('bam_transcript_paired_end')}, + { assert snapshot(workflow.out.bai_transcript).match('bai_transcript_paired_end')}, + { assert snapshot(workflow.out.stats_transcript).match('stats_transcript_paired_end')}, + { assert snapshot(workflow.out.flagstat_transcript).match('flagstat_transcript_paired_end')}, + { assert snapshot(workflow.out.idxstats_transcript).match('idxstats_transcript_paired_end')}, + { assert snapshot(workflow.out.idxstats_transcript).match('versions_paired_end')}, + { assert path(workflow.out.log_out.get(0).get(1)).exists() }, + { assert path(workflow.out.log_final.get(0).get(1)).exists() }, + { assert path(workflow.out.log_progress.get(0).get(1)).exists() } + ) + } + } + + test("homo_sapiens - fastq - paired_end - with_transcriptome") { + config "./with_transcripts.config" + + setup { + run("RSEM_PREPAREREFERENCE") { + script "../../../../modules/nf-core/rsem/preparereference/main.nf" + process { + """ + input[0] = channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkifexists: true)) + input[1] = channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkifexists: true)) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = true // star_ignore_sjdbgtf + input[4] = 'illumina' // seq_platform + input[5] = false // seq_center + input[6] = Channel.of([ + [ id:'test_fasta' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[7] = RSEM_PREPAREREFERENCE.out.transcript_fasta.map{[[:], it]} + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.orig_bam).match('orig_bam_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.bam_sorted).match('bam_sorted_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.fastq).match('fastq_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.tab).match('tab_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.orig_bam_transcript).match('orig_bam_transcript_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.bam).match('bam_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.bai).match('bai_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.stats).match('stats_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.flagstat).match('flagstat_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.idxstats).match('idxstats_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.bam_transcript).match('bam_transcript_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.bai_transcript).match('bai_transcript_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.stats_transcript).match('stats_transcript_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.flagstat_transcript).match('flagstat_transcript_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.idxstats_transcript).match('idxstats_transcript_paired_end_with_transcriptome')}, + { assert snapshot(workflow.out.idxstats_transcript).match('versions_paired_end_with_transcriptome')}, + { assert path(workflow.out.log_out.get(0).get(1)).exists() }, + { assert path(workflow.out.log_final.get(0).get(1)).exists() }, + { assert path(workflow.out.log_progress.get(0).get(1)).exists() } + ) + } + } + + test("homo_sapiens - fastq - paired_end - transcripts - no_transcriptome") { + config "./with_transcripts.config" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = true // star_ignore_sjdbgtf + input[4] = 'illumina' // seq_platform + input[5] = false // seq_center + input[6] = Channel.of([ + [ id:'test_fasta' ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[7] = Channel.of([[:], []]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.orig_bam).match('orig_bam_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.bam_sorted).match('bam_sorted_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.fastq).match('fastq_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.tab).match('tab_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.orig_bam_transcript).match('orig_bam_transcript_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.bam).match('bam_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.bai).match('bai_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.stats).match('stats_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.flagstat).match('flagstat_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.idxstats).match('idxstats_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.bam_transcript).match('bam_transcript_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.bai_transcript).match('bai_transcript_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.stats_transcript).match('stats_transcript_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.flagstat_transcript).match('flagstat_transcript_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.idxstats_transcript).match('idxstats_transcript_paired_end_transcripts_no_transcriptome')}, + { assert snapshot(workflow.out.idxstats_transcript).match('versions_paired_end_transcripts_no_transcriptome')}, + { assert path(workflow.out.log_out.get(0).get(1)).exists() }, + { assert path(workflow.out.log_final.get(0).get(1)).exists() }, + { assert path(workflow.out.log_progress.get(0).get(1)).exists() } + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_align_star/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_star/tests/main.nf.test.snap new file mode 100644 index 00000000..989d38a6 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/tests/main.nf.test.snap @@ -0,0 +1,1010 @@ +{ + "flagstat_transcript_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,a204dee59ef8b7cd0f7d952a80119b77" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.836115665" + }, + "orig_bam_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,51d2ef198d5725978a4b6da6fbec17dd" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.493635239" + }, + "tab_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.62565966" + }, + "stats_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.stats:md5,2b8eb6967b68f03584cc242499658e92" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:01:51.011841774" + }, + "bam_sorted_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.366823434" + }, + "idxstats_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,107ca94dd426cc44db316f0d402307c5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.799888732" + }, + "fastq_paired_end_with_transcriptome": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.404150611" + }, + "stats_transcript_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.92597042" + }, + "idxstats_transcript_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,4030919b4a05393dcd7e699d72725803" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.869017036" + }, + "flagstat_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,db0e25cd0b37d3030e807846c022199e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.673278468" + }, + "bam_transcript_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.816422587" + }, + "bai_transcript_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.889271918" + }, + "idxstats_transcript_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,4030919b4a05393dcd7e699d72725803" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.933689327" + }, + "idxstats_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,107ca94dd426cc44db316f0d402307c5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.717133215" + }, + "bai_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,5e05259dc3e00965ed3ffaaf5c3b7e2a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:10:08.920576" + }, + "stats_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,39273789abce501dc706fa94bbd0079d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.583114732" + }, + "tab_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.SJ.out.tab:md5,75a516ab950fb958f40b29996474949c" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:01:50.81938497" + }, + "orig_bam_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,8bb13b5b1530a4682d662f1a5c40ab88" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.324025875" + }, + "tab_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.450949052" + }, + "bam_transcript_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.848680274" + }, + "bam_transcript_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,304982537be52d39f99c33e6079297ff" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:12:26.792244" + }, + "bai_transcript_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,6cf664f6eeb968efdebbc44edf5a4bcb" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:11:20.001206" + }, + "bai_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,8bd17c254a618d5ef0cc2642abc00e40" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:11:19.900054" + }, + "idxstats_transcript_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.998050296" + }, + "versions_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,4030919b4a05393dcd7e699d72725803" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:51.008404374" + }, + "bam_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam:md5,c1c390806279fee5f7f904c1bddd6937" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:09:07.255318" + }, + "idxstats_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.idxstats:md5,0d5a12e3e3ffdb15f1c6c50f02f22575" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:01:51.104899639" + }, + "orig_bam_transcript_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.646770561" + }, + "stats_transcript_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,8b4b458a77eb4ffb04a568de36ecb677" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T18:17:49.016776458" + }, + "orig_bam_transcript_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.toTranscriptome.out.bam:md5,deb116674bf9a17a8082dd4899b0f11f" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.500316187" + }, + "orig_bam_transcript_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.toTranscriptome.out.bam:md5,deb116674bf9a17a8082dd4899b0f11f" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.46731827" + }, + "orig_bam_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.Aligned.out.bam:md5,8bb13b5b1530a4682d662f1a5c40ab88" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.343819207" + }, + "bam_transcript_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,f4b96ef58a9e762495e8a3ed27541ddf" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:11:19.947042" + }, + "fastq_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.387908106" + }, + "stats_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,39273789abce501dc706fa94bbd0079d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.71383862" + }, + "flagstat_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,db0e25cd0b37d3030e807846c022199e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.670712229" + }, + "flagstat_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.flagstat:md5,075e7a684a7ceb5fd1dae154f823128a" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:01:51.065241319" + }, + "bam_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,0e9f3d3c04cd1916f070e37aee7b1479" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:11:19.850548" + }, + "flagstat_transcript_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,a204dee59ef8b7cd0f7d952a80119b77" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.890160347" + }, + "flagstat_transcript_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.97328443" + }, + "versions_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,4030919b4a05393dcd7e699d72725803" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.905277179" + }, + "bam_sorted_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.369745857" + }, + "bam_sorted_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.548813329" + }, + "stats_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,39273789abce501dc706fa94bbd0079d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.625556349" + }, + "bai_transcript_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,b713ac63c6114db4482fde48a87a3b55" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:12:26.840899" + }, + "orig_bam_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.Aligned.out.bam:md5,cd655e6c7309158d42fcfb202a6100e7" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:01:50.699345543" + }, + "tab_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.SJ.out.tab:md5,844af19ab0fc8cd9a3f75228445aca0d" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.431755269" + }, + "bam_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,1d2371a1492623c665e7df1f377d30b1" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:10:08.892059" + }, + "bam_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,0e9f3d3c04cd1916f070e37aee7b1479" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:12:26.692602" + }, + "stats_transcript_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.945610051" + }, + "versions_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:38.025358383" + }, + "bai_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,8bd17c254a618d5ef0cc2642abc00e40" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:12:26.731803" + }, + "idxstats_paired_end_with_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,107ca94dd426cc44db316f0d402307c5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.702727344" + }, + "orig_bam_transcript_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.449184828" + }, + "idxstats_transcript_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.992614899" + }, + "bai_transcript_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.897935922" + }, + "flagstat_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,db0e25cd0b37d3030e807846c022199e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.74949925" + }, + "stats_transcript_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,fa2f10820ffe578ebe7d5935155b4366" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.847385522" + }, + "bam_sorted_paired_end_with_transcriptome": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-16T11:04:06.3779095" + }, + "bai_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.bai:md5,73c7f3fe35663c287cb421f1c8b761ae" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T14:09:07.343206" + }, + "fastq_paired_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:37:37.594559814" + }, + "flagstat_transcript_single_end": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:36:01.971850654" + }, + "fastq_paired_end_transcripts_no_transcriptome": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-15T19:39:50.410098606" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_star/tests/nextflow.config b/subworkflows/nf-core/fastq_align_star/tests/nextflow.config new file mode 100644 index 00000000..6af49d58 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/tests/nextflow.config @@ -0,0 +1,21 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat' + } + + withName: '.*:BAM_SORT_STATS_SAMTOOLS_.*:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.sorted" } + } + + withName: '.*:BAM_SORT_STATS_SAMTOOLS_.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } + } + +} diff --git a/subworkflows/nf-core/fastq_align_star/tests/tags.yml b/subworkflows/nf-core/fastq_align_star/tests/tags.yml new file mode 100644 index 00000000..a919a2d1 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_align_star: + - subworkflows/nf-core/fastq_align_star/** diff --git a/subworkflows/nf-core/fastq_align_star/tests/with_transcripts.config b/subworkflows/nf-core/fastq_align_star/tests/with_transcripts.config new file mode 100644 index 00000000..62f15250 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/tests/with_transcripts.config @@ -0,0 +1,21 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 9' + } + + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --quantMode TranscriptomeSAM' + } + + withName: '.*:BAM_SORT_STATS_SAMTOOLS_.*:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.sorted" } + } + + withName: '.*:BAM_SORT_STATS_SAMTOOLS_.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } + } + +} diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf new file mode 100644 index 00000000..291eddc1 --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf @@ -0,0 +1,45 @@ +// +// Run VEP to annotate VCF files +// + +include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' + +workflow VCF_ANNOTATE_ENSEMBLVEP { + take: + ch_vcf // channel: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)]] + ch_fasta // channel: [ val(meta2), path(fasta) ] (optional) + val_genome // value: genome to use + val_species // value: species to use + val_cache_version // value: cache version to use + ch_cache // channel: [ val(meta3), path(cache) ] (optional) + ch_extra_files // channel: [ path(file1), path(file2)... ] (optional) + + main: + ch_versions = Channel.empty() + + ENSEMBLVEP_VEP( + ch_vcf, + val_genome, + val_species, + val_cache_version, + ch_cache, + ch_fasta, + ch_extra_files + ) + + TABIX_TABIX(ENSEMBLVEP_VEP.out.vcf) + + ch_vcf_tbi = ENSEMBLVEP_VEP.out.vcf.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ val(meta), path(vcf), path(tbi) ] + json = ENSEMBLVEP_VEP.out.json // channel: [ val(meta), path(json) ] + tab = ENSEMBLVEP_VEP.out.tab // channel: [ val(meta), path(tab) ] + reports = ENSEMBLVEP_VEP.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml new file mode 100644 index 00000000..15d42da2 --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml @@ -0,0 +1,65 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: vcf_annotate_ensemblvep +description: Perform annotation with ensemblvep and bgzip + tabix index the resulting VCF file +keywords: + - vcf + - annotation + - ensemblvep +components: + - ensemblvep/vep + - tabix/tabix +input: + - ch_vcf: + description: | + vcf file to annotate + Structure: [ val(meta), path(vcf), [path(custom_file1), path(custom_file2)... (optionnal)] ] + - ch_fasta: + description: | + Reference genome fasta file (optional) + Structure: [ val(meta2), path(fasta) ] + - val_genome: + type: string + description: genome to use + - val_species: + type: string + description: species to use + - val_cache_version: + type: integer + description: cache version to use + - ch_cache: + description: | + the root cache folder for ensemblvep (optional) + Structure: [ val(meta3), path(cache) ] + - ch_extra_files: + description: | + any extra files needed by plugins for ensemblvep (optional) + Structure: [ path(file1), path(file2)... ] +output: + - vcf_tbi: + description: | + Compressed vcf file + tabix index + Structure: [ val(meta), path(vcf), path(tbi) ] + - json: + description: | + json file + Structure: [ val(meta), path(json) ] + - tab: + description: | + tab file + Structure: [ val(meta), path(tab) ] + - reports: + type: file + description: html reports + pattern: "*.html" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/subworkflows/nf-core/vcf_annotate_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf new file mode 100644 index 00000000..3570a5b7 --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf @@ -0,0 +1,28 @@ +// +// Run SNPEFF to annotate VCF files +// + +include { SNPEFF_SNPEFF } from '../../../modules/nf-core/snpeff/snpeff/main.nf' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix/main.nf' + +workflow VCF_ANNOTATE_SNPEFF { + take: + ch_vcf // channel: [ val(meta), path(vcf) ] + val_snpeff_db // string: db version to use + ch_snpeff_cache // channel: [ path(cache) ] (optional) + + main: + ch_versions = Channel.empty() + + SNPEFF_SNPEFF(ch_vcf, val_snpeff_db, ch_snpeff_cache) + TABIX_BGZIPTABIX(SNPEFF_SNPEFF.out.vcf) + + // Gather versions of all tools used + ch_versions = ch_versions.mix(SNPEFF_SNPEFF.out.versions) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + + emit: + vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), path(vcf), path(tbi) ] + reports = SNPEFF_SNPEFF.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml b/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml new file mode 100644 index 00000000..c8d5a635 --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml @@ -0,0 +1,40 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: vcf_annotate_snpeff +description: Perform annotation with snpEff and bgzip + tabix index the resulting VCF file +keywords: + - vcf + - annotation + - snpeff +components: + - snpeff + - snpeff/snpeff + - tabix/bgziptabix +input: + - ch_vcf: + description: | + vcf file + Structure: [ val(meta), path(vcf) ] + - val_snpeff_db: + type: string + description: db version to use + - ch_snpeff_cache: + description: | + path to root cache folder for snpEff (optional) + Structure: [ path(cache) ] +output: + - vcf_tbi: + description: | + Compressed vcf file + tabix index + Structure: [ val(meta), path(vcf), path(tbi) ] + - reports: + description: | + html reports + Structure: [ path(html) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 00000000..559c1dfb --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,9 @@ +multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc_data.json +multiqc/multiqc_data/multiqc_general_stats.txt +multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_data/picard_deduplication.txt +multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} +multiqc/multiqc_report.html +pipeline_info/*.{html,json,txt,yml} diff --git a/tests/annotation.nf.test b/tests/annotation.nf.test new file mode 100644 index 00000000..ca4c0c79 --- /dev/null +++ b/tests/annotation.nf.test @@ -0,0 +1,97 @@ +nextflow_pipeline { + + name "Test pipeline | annotation" + script "../main.nf" + tag "pipeline" + tag "pipeline_rnavar" + + test("Run with profile test | annotation with snpeff") { + + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + annotate_tools = 'snpeff' + download_cache = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnavar_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name + ).match() } + ) + } + } + + test("Run with profile test | annotation with vep") { + + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + annotate_tools = 'vep' + download_cache = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnavar_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name + ).match() } + ) + } + } + + test("Run with profile test | annotation with merge") { + + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + annotate_tools = 'merge' + download_cache = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnavar_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name + ).match() } + ) + } + } +} diff --git a/tests/annotation.nf.test.snap b/tests/annotation.nf.test.snap new file mode 100644 index 00000000..a482d28f --- /dev/null +++ b/tests/annotation.nf.test.snap @@ -0,0 +1,445 @@ +{ + "Run with profile test | annotation with merge": { + "content": [ + 46, + null, + [ + "annotation", + "annotation/haplotypecaller", + "annotation/haplotypecaller/GM12878", + "annotation/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_snpEff_VEP.ann.vcf.gz", + "annotation/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_snpEff_VEP.ann.vcf.gz.tbi", + "cache", + "cache/snpeff_cache", + "cache/snpeff_cache/WBcel235.105", + "cache/snpeff_cache/WBcel235.105/sequence.I.bin", + "cache/snpeff_cache/WBcel235.105/sequence.II.bin", + "cache/snpeff_cache/WBcel235.105/sequence.III.bin", + "cache/snpeff_cache/WBcel235.105/sequence.IV.bin", + "cache/snpeff_cache/WBcel235.105/sequence.V.bin", + "cache/snpeff_cache/WBcel235.105/sequence.X.bin", + "cache/snpeff_cache/WBcel235.105/sequence.bin", + "cache/snpeff_cache/WBcel235.105/snpEffectPredictor.bin", + "cache/vep_cache", + "cache/vep_cache/caenorhabditis_elegans", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/16000001-17000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/17000001-18000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/MtDNA", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/MtDNA/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/16000001-17000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/17000001-18000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/18000001-19000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/19000001-20000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/20000001-21000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/16000001-17000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/17000001-18000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/chr_synonyms.txt", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/info.txt", + "cache/versions.yml", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "preprocessing", + "preprocessing/GM12878", + "preprocessing/GM12878/GM12878.md.bam", + "preprocessing/GM12878/GM12878.md.bam.bai", + "preprocessing/GM12878/GM12878.recal.bam", + "preprocessing/GM12878/GM12878.recal.bam.bai", + "reports", + "reports/EnsemblVEP", + "reports/EnsemblVEP/haplotypecaller", + "reports/EnsemblVEP/haplotypecaller/GM12878", + "reports/EnsemblVEP/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_snpEff_VEP.ann.summary.html", + "reports/multiqc_data", + "reports/multiqc_data/multiqc.log", + "reports/multiqc_data/multiqc_citations.txt", + "reports/multiqc_data/multiqc_data.json", + "reports/multiqc_data/multiqc_software_versions.txt", + "reports/multiqc_data/multiqc_sources.txt", + "reports/multiqc_report.html", + "reports/snpeff", + "reports/snpeff/haplotypecaller", + "reports/snpeff/haplotypecaller/GM12878", + "reports/stats", + "reports/stats/GM12878", + "reports/stats/GM12878/GM12878.flagstat", + "reports/stats/GM12878/GM12878.stats", + "reports/stats/GM12878/STAR_log", + "reports/stats/GM12878/STAR_log/GM12878.Log.final.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.progress.out", + "reports/stats/GM12878/STAR_log/GM12878.SJ.out.tab", + "samtools", + "samtools/GM12878.bam", + "samtools/GM12878.bam.bai", + "variant_calling", + "variant_calling/GM12878", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.tbi", + "variant_calling/dbsnp_146.hg38.vcf", + "variant_calling/dbsnp_146.hg38.vcf/dbsnp_146.hg38.vcf.gz.tbi", + "variant_calling/mills_and_1000G.indels.vcf", + "variant_calling/mills_and_1000G.indels.vcf/mills_and_1000G.indels.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T14:54:33.842224" + }, + "Run with profile test | annotation with vep": { + "content": [ + 43, + null, + [ + "annotation", + "annotation/haplotypecaller", + "annotation/haplotypecaller/GM12878", + "annotation/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_VEP.ann.vcf.gz", + "annotation/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_VEP.ann.vcf.gz.tbi", + "cache", + "cache/vep_cache", + "cache/vep_cache/caenorhabditis_elegans", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/I/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/II/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/III/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/16000001-17000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/17000001-18000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/IV/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/MtDNA", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/MtDNA/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/16000001-17000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/17000001-18000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/18000001-19000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/19000001-20000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/20000001-21000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/V/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/1-1000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/10000001-11000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/1000001-2000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/11000001-12000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/12000001-13000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/13000001-14000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/14000001-15000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/15000001-16000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/16000001-17000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/17000001-18000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/2000001-3000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/3000001-4000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/4000001-5000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/5000001-6000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/6000001-7000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/7000001-8000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/8000001-9000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/X/9000001-10000000.gz", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/chr_synonyms.txt", + "cache/vep_cache/caenorhabditis_elegans/110_WBcel235/info.txt", + "cache/versions.yml", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "preprocessing", + "preprocessing/GM12878", + "preprocessing/GM12878/GM12878.md.bam", + "preprocessing/GM12878/GM12878.md.bam.bai", + "preprocessing/GM12878/GM12878.recal.bam", + "preprocessing/GM12878/GM12878.recal.bam.bai", + "reports", + "reports/EnsemblVEP", + "reports/EnsemblVEP/haplotypecaller", + "reports/EnsemblVEP/haplotypecaller/GM12878", + "reports/EnsemblVEP/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_VEP.ann.summary.html", + "reports/multiqc_data", + "reports/multiqc_data/multiqc.log", + "reports/multiqc_data/multiqc_citations.txt", + "reports/multiqc_data/multiqc_data.json", + "reports/multiqc_data/multiqc_software_versions.txt", + "reports/multiqc_data/multiqc_sources.txt", + "reports/multiqc_report.html", + "reports/stats", + "reports/stats/GM12878", + "reports/stats/GM12878/GM12878.flagstat", + "reports/stats/GM12878/GM12878.stats", + "reports/stats/GM12878/STAR_log", + "reports/stats/GM12878/STAR_log/GM12878.Log.final.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.progress.out", + "reports/stats/GM12878/STAR_log/GM12878.SJ.out.tab", + "samtools", + "samtools/GM12878.bam", + "samtools/GM12878.bam.bai", + "variant_calling", + "variant_calling/GM12878", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.tbi", + "variant_calling/dbsnp_146.hg38.vcf", + "variant_calling/dbsnp_146.hg38.vcf/dbsnp_146.hg38.vcf.gz.tbi", + "variant_calling/mills_and_1000G.indels.vcf", + "variant_calling/mills_and_1000G.indels.vcf/mills_and_1000G.indels.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T14:49:38.621181" + }, + "Run with profile test | annotation with snpeff": { + "content": [ + 43, + null, + [ + "annotation", + "annotation/haplotypecaller", + "annotation/haplotypecaller/GM12878", + "annotation/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_snpEff.ann.vcf.gz", + "annotation/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_snpEff.ann.vcf.gz.tbi", + "cache", + "cache/snpeff_cache", + "cache/snpeff_cache/WBcel235.105", + "cache/snpeff_cache/WBcel235.105/sequence.I.bin", + "cache/snpeff_cache/WBcel235.105/sequence.II.bin", + "cache/snpeff_cache/WBcel235.105/sequence.III.bin", + "cache/snpeff_cache/WBcel235.105/sequence.IV.bin", + "cache/snpeff_cache/WBcel235.105/sequence.V.bin", + "cache/snpeff_cache/WBcel235.105/sequence.X.bin", + "cache/snpeff_cache/WBcel235.105/sequence.bin", + "cache/snpeff_cache/WBcel235.105/snpEffectPredictor.bin", + "cache/versions.yml", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "preprocessing", + "preprocessing/GM12878", + "preprocessing/GM12878/GM12878.md.bam", + "preprocessing/GM12878/GM12878.md.bam.bai", + "preprocessing/GM12878/GM12878.recal.bam", + "preprocessing/GM12878/GM12878.recal.bam.bai", + "reports", + "reports/multiqc_data", + "reports/multiqc_data/multiqc.log", + "reports/multiqc_data/multiqc_citations.txt", + "reports/multiqc_data/multiqc_data.json", + "reports/multiqc_data/multiqc_software_versions.txt", + "reports/multiqc_data/multiqc_sources.txt", + "reports/multiqc_report.html", + "reports/snpeff", + "reports/snpeff/haplotypecaller", + "reports/snpeff/haplotypecaller/GM12878", + "reports/snpeff/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_snpEff.csv", + "reports/snpeff/haplotypecaller/GM12878/GM12878.haplotypecaller.filtered_snpEff.genes.txt", + "reports/snpeff/haplotypecaller/GM12878/snpEff_summary.html", + "reports/stats", + "reports/stats/GM12878", + "reports/stats/GM12878/GM12878.flagstat", + "reports/stats/GM12878/GM12878.stats", + "reports/stats/GM12878/STAR_log", + "reports/stats/GM12878/STAR_log/GM12878.Log.final.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.progress.out", + "reports/stats/GM12878/STAR_log/GM12878.SJ.out.tab", + "samtools", + "samtools/GM12878.bam", + "samtools/GM12878.bam.bai", + "variant_calling", + "variant_calling/GM12878", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.tbi", + "variant_calling/dbsnp_146.hg38.vcf", + "variant_calling/dbsnp_146.hg38.vcf/dbsnp_146.hg38.vcf.gz.tbi", + "variant_calling/mills_and_1000G.indels.vcf", + "variant_calling/mills_and_1000G.indels.vcf/mills_and_1000G.indels.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T14:46:10.437216" + } +} \ No newline at end of file diff --git a/tests/bam_csi.nf.test b/tests/bam_csi.nf.test new file mode 100644 index 00000000..64ea8f58 --- /dev/null +++ b/tests/bam_csi.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline | bam_csi_index" + script "../main.nf" + tag "pipeline" + tag "pipeline_rnavar" + + test("Run with profile test | bam_csi_index") { + + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + bam_csi_index = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnavar_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name + ).match() } + ) + } + } +} diff --git a/tests/bam_csi.nf.test.snap b/tests/bam_csi.nf.test.snap new file mode 100644 index 00000000..8958e3dc --- /dev/null +++ b/tests/bam_csi.nf.test.snap @@ -0,0 +1,51 @@ +{ + "Run with profile test | bam_csi_index": { + "content": [ + 39, + null, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "preprocessing", + "preprocessing/GM12878", + "preprocessing/GM12878/GM12878.md.bam", + "preprocessing/GM12878/GM12878.md.bam.csi", + "preprocessing/GM12878/GM12878.recal.bam", + "preprocessing/GM12878/GM12878.recal.bam.csi", + "reports", + "reports/multiqc_data", + "reports/multiqc_data/multiqc.log", + "reports/multiqc_data/multiqc_citations.txt", + "reports/multiqc_data/multiqc_data.json", + "reports/multiqc_data/multiqc_software_versions.txt", + "reports/multiqc_data/multiqc_sources.txt", + "reports/multiqc_report.html", + "reports/stats", + "reports/stats/GM12878", + "reports/stats/GM12878/GM12878.flagstat", + "reports/stats/GM12878/GM12878.stats", + "reports/stats/GM12878/STAR_log", + "reports/stats/GM12878/STAR_log/GM12878.Log.final.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.progress.out", + "reports/stats/GM12878/STAR_log/GM12878.SJ.out.tab", + "samtools", + "samtools/GM12878.bam", + "samtools/GM12878.bam.bai", + "variant_calling", + "variant_calling/GM12878", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.csi", + "variant_calling/dbsnp_146.hg38.vcf", + "variant_calling/dbsnp_146.hg38.vcf/dbsnp_146.hg38.vcf.gz.csi", + "variant_calling/mills_and_1000G.indels.vcf", + "variant_calling/mills_and_1000G.indels.vcf/mills_and_1000G.indels.vcf.gz.csi" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T14:57:45.737274" + } +} \ No newline at end of file diff --git a/tests/csv/1.0/fastq_single.csv b/tests/csv/1.0/fastq_single.csv new file mode 100644 index 00000000..125f9bfd --- /dev/null +++ b/tests/csv/1.0/fastq_single.csv @@ -0,0 +1,2 @@ +sample,fastq_1,fastq_2,strandedness +GM12878,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz,https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz,reverse diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 00000000..18825af0 --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,35 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + tag "pipeline_rnavar" + + test("Run with profile test") { + + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnavar_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name + ).match() } + ) + } + } +} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 00000000..b1161f20 --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,53 @@ +{ + "Run with profile test": { + "content": [ + 40, + null, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "preprocessing", + "preprocessing/GM12878", + "preprocessing/GM12878/GM12878.md.bam", + "preprocessing/GM12878/GM12878.md.bam.bai", + "preprocessing/GM12878/GM12878.recal.bam", + "preprocessing/GM12878/GM12878.recal.bam.bai", + "reports", + "reports/multiqc_data", + "reports/multiqc_data/multiqc.log", + "reports/multiqc_data/multiqc_citations.txt", + "reports/multiqc_data/multiqc_data.json", + "reports/multiqc_data/multiqc_software_versions.txt", + "reports/multiqc_data/multiqc_sources.txt", + "reports/multiqc_report.html", + "reports/stats", + "reports/stats/GM12878", + "reports/stats/GM12878/GM12878.flagstat", + "reports/stats/GM12878/GM12878.stats", + "reports/stats/GM12878/STAR_log", + "reports/stats/GM12878/STAR_log/GM12878.Log.final.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.progress.out", + "reports/stats/GM12878/STAR_log/GM12878.SJ.out.tab", + "samtools", + "samtools/GM12878.bam", + "samtools/GM12878.bam.bai", + "variant_calling", + "variant_calling/GM12878", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.tbi", + "variant_calling/dbsnp_146.hg38.vcf", + "variant_calling/dbsnp_146.hg38.vcf/dbsnp_146.hg38.vcf.gz.tbi", + "variant_calling/mills_and_1000G.indels.vcf", + "variant_calling/mills_and_1000G.indels.vcf/mills_and_1000G.indels.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T15:01:04.962148" + } +} \ No newline at end of file diff --git a/tests/removeduplicates.nf.test b/tests/removeduplicates.nf.test new file mode 100644 index 00000000..bac8205b --- /dev/null +++ b/tests/removeduplicates.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline | remove_duplicates" + script "../main.nf" + tag "pipeline" + tag "pipeline_rnavar" + + test("Run with profile test | remove_duplicates") { + + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + remove_duplicates = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnavar_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name + ).match() } + ) + } + } +} diff --git a/tests/removeduplicates.nf.test.snap b/tests/removeduplicates.nf.test.snap new file mode 100644 index 00000000..6011aff2 --- /dev/null +++ b/tests/removeduplicates.nf.test.snap @@ -0,0 +1,53 @@ +{ + "Run with profile test | remove_duplicates": { + "content": [ + 40, + null, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "preprocessing", + "preprocessing/GM12878", + "preprocessing/GM12878/GM12878.md.bam", + "preprocessing/GM12878/GM12878.md.bam.bai", + "preprocessing/GM12878/GM12878.recal.bam", + "preprocessing/GM12878/GM12878.recal.bam.bai", + "reports", + "reports/multiqc_data", + "reports/multiqc_data/multiqc.log", + "reports/multiqc_data/multiqc_citations.txt", + "reports/multiqc_data/multiqc_data.json", + "reports/multiqc_data/multiqc_software_versions.txt", + "reports/multiqc_data/multiqc_sources.txt", + "reports/multiqc_report.html", + "reports/stats", + "reports/stats/GM12878", + "reports/stats/GM12878/GM12878.flagstat", + "reports/stats/GM12878/GM12878.stats", + "reports/stats/GM12878/STAR_log", + "reports/stats/GM12878/STAR_log/GM12878.Log.final.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.progress.out", + "reports/stats/GM12878/STAR_log/GM12878.SJ.out.tab", + "samtools", + "samtools/GM12878.bam", + "samtools/GM12878.bam.bai", + "variant_calling", + "variant_calling/GM12878", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.tbi", + "variant_calling/dbsnp_146.hg38.vcf", + "variant_calling/dbsnp_146.hg38.vcf/dbsnp_146.hg38.vcf.gz.tbi", + "variant_calling/mills_and_1000G.indels.vcf", + "variant_calling/mills_and_1000G.indels.vcf/mills_and_1000G.indels.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T15:05:33.998547" + } +} \ No newline at end of file diff --git a/tests/skip_baserecalibration.nf.test b/tests/skip_baserecalibration.nf.test new file mode 100644 index 00000000..78982da0 --- /dev/null +++ b/tests/skip_baserecalibration.nf.test @@ -0,0 +1,36 @@ +nextflow_pipeline { + + name "Test pipeline | skip_baserecalibration" + script "../main.nf" + tag "pipeline" + tag "pipeline_rnavar" + + test("Run with profile test | skip_baserecalibration") { + + when { + params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + outdir = "$outputDir" + skip_baserecalibration = true + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnavar_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name + ).match() } + ) + } + } +} diff --git a/tests/skip_baserecalibration.nf.test.snap b/tests/skip_baserecalibration.nf.test.snap new file mode 100644 index 00000000..2b3cefbe --- /dev/null +++ b/tests/skip_baserecalibration.nf.test.snap @@ -0,0 +1,51 @@ +{ + "Run with profile test | skip_baserecalibration": { + "content": [ + 36, + null, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "preprocessing", + "preprocessing/GM12878", + "preprocessing/GM12878/GM12878.md.bam", + "preprocessing/GM12878/GM12878.md.bam.bai", + "reports", + "reports/multiqc_data", + "reports/multiqc_data/multiqc.log", + "reports/multiqc_data/multiqc_citations.txt", + "reports/multiqc_data/multiqc_data.json", + "reports/multiqc_data/multiqc_software_versions.txt", + "reports/multiqc_data/multiqc_sources.txt", + "reports/multiqc_report.html", + "reports/stats", + "reports/stats/GM12878", + "reports/stats/GM12878/GM12878.flagstat", + "reports/stats/GM12878/GM12878.stats", + "reports/stats/GM12878/STAR_log", + "reports/stats/GM12878/STAR_log/GM12878.Log.final.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.out", + "reports/stats/GM12878/STAR_log/GM12878.Log.progress.out", + "reports/stats/GM12878/STAR_log/GM12878.SJ.out.tab", + "samtools", + "samtools/GM12878.bam", + "samtools/GM12878.bam.bai", + "variant_calling", + "variant_calling/GM12878", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.filtered.vcf.gz.tbi", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz", + "variant_calling/GM12878/GM12878.haplotypecaller.vcf.gz.tbi", + "variant_calling/dbsnp_146.hg38.vcf", + "variant_calling/dbsnp_146.hg38.vcf/dbsnp_146.hg38.vcf.gz.tbi", + "variant_calling/mills_and_1000G.indels.vcf", + "variant_calling/mills_and_1000G.indels.vcf/mills_and_1000G.indels.vcf.gz.tbi" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-11T15:10:25.387052" + } +} \ No newline at end of file diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf deleted file mode 100644 index c5dcbe5f..00000000 --- a/workflows/rnavar.nf +++ /dev/null @@ -1,97 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnavar_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow RNAVAR { - - take: - ch_samplesheet // channel: samplesheet read in from --input - main: - - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - // - // MODULE: Run FastQC - // - FASTQC ( - ch_samplesheet - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', - sort: true, - newLine: true - ).set { ch_collated_versions } - - - // - // MODULE: MultiQC - // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( - ch_methods_description.collectFile( - name: 'methods_description_mqc.yaml', - sort: true - ) - ) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] - ) - - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] - -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/rnavar/main.nf b/workflows/rnavar/main.nf new file mode 100755 index 00000000..3deaa578 --- /dev/null +++ b/workflows/rnavar/main.nf @@ -0,0 +1,445 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// local +include { GTF2BED } from '../../modules/local/gtf2bed' + +// nf-core +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq' +include { FASTQC } from '../../modules/nf-core/fastqc' +include { GATK4_BASERECALIBRATOR } from '../../modules/nf-core/gatk4/baserecalibrator' +include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist' +include { GATK4_COMBINEGVCFS } from '../../modules/nf-core/gatk4/combinegvcfs' +include { GATK4_HAPLOTYPECALLER } from '../../modules/nf-core/gatk4/haplotypecaller' +include { GATK4_INDEXFEATUREFILE } from '../../modules/nf-core/gatk4/indexfeaturefile' +include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools' +include { GATK4_MERGEVCFS } from '../../modules/nf-core/gatk4/mergevcfs' +include { GATK4_VARIANTFILTRATION } from '../../modules/nf-core/gatk4/variantfiltration' +include { MULTIQC } from '../../modules/nf-core/multiqc' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index' +include { TABIX_TABIX as TABIX } from '../../modules/nf-core/tabix/tabix' +include { TABIX_TABIX as TABIXGVCF } from '../../modules/nf-core/tabix/tabix' + +// local +include { RECALIBRATE } from '../../subworkflows/local/recalibrate' +include { SPLITNCIGAR } from '../../subworkflows/local/splitncigar' +include { VCF_ANNOTATE_ALL } from '../../subworkflows/local/vcf_annotate_all' + +// nf-core +include { BAM_MARKDUPLICATES_PICARD } from '../../subworkflows/nf-core/bam_markduplicates_picard' +include { FASTQ_ALIGN_STAR } from '../../subworkflows/nf-core/fastq_align_star' + +// local +include { checkSamplesAfterGrouping } from '../../subworkflows/local/utils_nfcore_rnavar_pipeline' +include { methodsDescriptionText } from '../../subworkflows/local/utils_nfcore_rnavar_pipeline' + +// nf-core +include { paramsSummaryMultiqc } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' + +// plugin +include { paramsSummaryMap } from 'plugin/nf-schema' + +/* +======================================================================================== + RUN MAIN WORKFLOW RNAVAR +======================================================================================== +*/ + +workflow RNAVAR { + take: + ch_input + ch_dbsnp + ch_dbsnp_tbi + ch_dict + ch_exon_bed + ch_fasta + ch_fasta_fai + ch_gtf + ch_known_indels + ch_known_indels_tbi + ch_star_index + snpeff_cache + vep_genome + vep_species + vep_cache_version + vep_cache + vep_extra_files + seq_center + seq_platform + + main: + + // To gather all QC reports for MultiQC + ch_reports = Channel.empty() + + // To gather used softwares versions for MultiQC + ch_versions = Channel.empty() + + // MODULE: Concatenate FastQ files from same sample if required + ch_fastq = ch_input.groupTuple().map{ samplesheet -> checkSamplesAfterGrouping(samplesheet) } + .branch{ meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } + + CAT_FASTQ(ch_fastq.multiple) + + ch_cat_fastq = CAT_FASTQ.out.reads.mix(ch_fastq.single) + + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + + // MODULE: Generate QC summary using FastQC + FASTQC(ch_cat_fastq) + ch_reports = ch_reports.mix(FASTQC.out.zip.collect{ meta, logs -> logs }) + ch_versions = ch_versions.mix(FASTQC.out.versions) + + // + // MODULE: Prepare the interval list from the GTF file using GATK4 BedToIntervalList + // + + GATK4_BEDTOINTERVALLIST(ch_exon_bed, ch_dict) + ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list + ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) + + // + // MODULE: Scatter one interval-list into many interval-files using GATK4 IntervalListTools + // + ch_interval_list_split = Channel.empty() + if (!params.skip_intervallisttools) { + GATK4_INTERVALLISTTOOLS(ch_interval_list) + ch_interval_list_split = GATK4_INTERVALLISTTOOLS.out.interval_list.map{ meta, bed -> [bed] }.flatten() + } + else ch_interval_list_split = ch_interval_list + + // + // SUBWORKFLOW: Perform read alignment using STAR aligner + // + ch_genome_bam = Channel.empty() + ch_genome_bam_index = Channel.empty() + ch_samtools_stats = Channel.empty() + ch_samtools_flagstat = Channel.empty() + ch_samtools_idxstats = Channel.empty() + ch_star_multiqc = Channel.empty() + ch_aligner_pca_multiqc = Channel.empty() + ch_aligner_clustering_multiqc = Channel.empty() + + if (params.aligner == 'star') { + FASTQ_ALIGN_STAR(ch_cat_fastq, + ch_star_index, + ch_gtf, + params.star_ignore_sjdbgtf, + seq_platform, + seq_center, + ch_fasta, + [[:],[]]) //ch_transcripts_fasta) + + ch_genome_bam = FASTQ_ALIGN_STAR.out.bam + ch_genome_bam_index = FASTQ_ALIGN_STAR.out.bai + ch_transcriptome_bam = FASTQ_ALIGN_STAR.out.bam_transcript + + // Gather QC reports + ch_reports = ch_reports.mix(FASTQ_ALIGN_STAR.out.log_out) + ch_reports = ch_reports.mix(FASTQ_ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(FASTQ_ALIGN_STAR.out.versions) + + // + // SUBWORKFLOW: Mark duplicates with GATK4 + // + BAM_MARKDUPLICATES_PICARD(ch_genome_bam, + ch_fasta, + ch_fasta_fai.map{ it -> [[id:'genome'], it] }) + + ch_genome_bam_bai = BAM_MARKDUPLICATES_PICARD.out.bam + .join(BAM_MARKDUPLICATES_PICARD.out.bai, remainder: true) + .join(BAM_MARKDUPLICATES_PICARD.out.csi, remainder: true) + .map{meta, bam, bai, csi -> + if (bai) [meta, bam, bai] + else [meta, bam, csi] + } + + //Gather QC reports + ch_reports = ch_reports.mix(BAM_MARKDUPLICATES_PICARD.out.metrics.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(BAM_MARKDUPLICATES_PICARD.out.stats.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(BAM_MARKDUPLICATES_PICARD.out.flagstat.collect{it[1]}.ifEmpty([])) + ch_reports = ch_reports.mix(BAM_MARKDUPLICATES_PICARD.out.idxstats.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions) + + // + // SUBWORKFLOW: SplitNCigarReads from GATK4 over the intervals + // Splits reads that contain Ns in their cigar string(e.g. spanning splicing events in RNAseq data). + // + + SPLITNCIGAR(ch_genome_bam_bai, + ch_fasta, + ch_fasta_fai, + ch_dict, + ch_interval_list_split + ) + + ch_splitncigar_bam_bai = SPLITNCIGAR.out.bam_bai + ch_versions = ch_versions.mix(SPLITNCIGAR.out.versions) + + // + // MODULE: BaseRecalibrator from GATK4 + // Generates a recalibration table based on various co-variates + // + ch_bam_variant_calling = Channel.empty() + + if (!params.skip_baserecalibration) { + ch_bqsr_table = Channel.empty() + // known_sites is made by grouping both the dbsnp and the known indels ressources + // they can either or both be optional + ch_known_sites = ch_dbsnp.concat(ch_known_indels).collect() + ch_known_sites_tbi = ch_dbsnp_tbi.concat(ch_known_indels_tbi).collect() + + ch_interval_list_recalib = ch_interval_list.map{ meta, bed -> [bed] }.flatten() + ch_splitncigar_bam_bai_interval = ch_splitncigar_bam_bai.combine(ch_interval_list_recalib) + .map{ meta, bam, bai, interval -> [ meta, bam, bai, interval] } + + GATK4_BASERECALIBRATOR( + ch_splitncigar_bam_bai_interval, + ch_fasta.map{ meta, fasta -> [fasta] }, + ch_fasta_fai, + ch_dict.map{ meta, dict -> [dict] }, + ch_known_sites, + ch_known_sites_tbi + ) + ch_bqsr_table = GATK4_BASERECALIBRATOR.out.table + + // Gather QC reports + ch_reports = ch_reports.mix(ch_bqsr_table.map{ meta, table -> table}) + ch_versions = ch_versions.mix(GATK4_BASERECALIBRATOR.out.versions) + + ch_bam_applybqsr = ch_splitncigar_bam_bai.join(ch_bqsr_table) + ch_bam_recalibrated_qc = Channel.empty() + + ch_interval_list_applybqsr = ch_interval_list.map{ meta, bed -> [bed] }.flatten() + ch_bam_applybqsr.combine(ch_interval_list_applybqsr) + .map{ meta, bam, bai, table, interval -> [ meta, bam, bai, table, interval]} + .set{ch_applybqsr_bam_bai_interval} + + // + // MODULE: ApplyBaseRecalibrator from GATK4 + // Recalibrates the base qualities of the input reads based on the recalibration table produced by the GATK BaseRecalibrator tool. + // + RECALIBRATE( + params.skip_multiqc, + ch_applybqsr_bam_bai_interval, + ch_dict.map{ meta, dict -> [dict] }, + ch_fasta_fai, + ch_fasta.map{ meta, fasta -> [fasta] } + ) + + ch_bam_variant_calling = RECALIBRATE.out.bam + ch_bam_recalibrated_qc = RECALIBRATE.out.qc + + // Gather QC reports + ch_reports = ch_reports.mix(RECALIBRATE.out.qc.collect{it[1]}.ifEmpty([])) + ch_versions = ch_versions.mix(RECALIBRATE.out.versions) + } else { + ch_bam_variant_calling = ch_splitncigar_bam_bai + } + + interval_flag = params.no_intervals + // Run haplotyper even in the absence of dbSNP files + if (!params.dbsnp){ + ch_dbsnp_for_haplotypecaller = [[id:'null'], []] + ch_dbsnp_for_haplotypecaller_tbi = [[id:'null'], []] + } else { + ch_dbsnp_for_haplotypecaller = ch_dbsnp.map{ vcf -> [[id:'dbsnp'], vcf] } + ch_dbsnp_for_haplotypecaller_tbi = ch_dbsnp_tbi.map{ tbi -> [[id:'dbsnp'], tbi] } + } + + ch_haplotypecaller_vcf = Channel.empty() + ch_haplotypecaller_interval_bam = ch_bam_variant_calling.combine(ch_interval_list_split) + .map{ meta, bam, bai, interval_list -> + [ meta + [ id:meta.id + "_" + interval_list.baseName, sample:meta.id, variantcaller:'haplotypecaller' ], bam, bai, interval_list, [] ] + } + + // + // MODULE: HaplotypeCaller from GATK4 + // Calls germline SNPs and indels via local re-assembly of haplotypes. + // + + GATK4_HAPLOTYPECALLER( + ch_haplotypecaller_interval_bam, + ch_fasta, + ch_fasta_fai.map{ it -> [[id:it.baseName], it] }, + ch_dict, + ch_dbsnp_for_haplotypecaller, + ch_dbsnp_for_haplotypecaller_tbi + ) + + ch_haplotypecaller_raw = GATK4_HAPLOTYPECALLER.out.vcf.map{ meta, vcf -> [ meta + [id:meta.sample] - meta.subMap('sample'), vcf ] }.groupTuple() + + ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLER.out.versions) + + if (!params.generate_gvcf){ + // + // MODULE: MergeVCFS from GATK4 + // Merge multiple VCF files into one VCF + // + GATK4_MERGEVCFS( + ch_haplotypecaller_raw, + ch_dict + ) + ch_haplotypecaller_vcf = GATK4_MERGEVCFS.out.vcf + ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions) + + // + // MODULE: Index the VCF using TABIX + // + TABIX( + ch_haplotypecaller_vcf + ) + + ch_haplotypecaller_vcf_tbi = ch_haplotypecaller_vcf + .join(TABIX.out.tbi, by: [0], remainder: true) + .join(TABIX.out.csi, by: [0], remainder: true) + .map{meta, vcf, tbi, csi -> + if (tbi) [meta, vcf, tbi] + else [meta, vcf, csi] + } + + ch_versions = ch_versions.mix(TABIX.out.versions) + ch_final_vcf = ch_haplotypecaller_vcf + + // + // MODULE: VariantFiltration from GATK4 + // Filter variant calls based on certain criteria + // + if (!params.skip_variantfiltration && !params.bam_csi_index ) { + + GATK4_VARIANTFILTRATION( + ch_haplotypecaller_vcf_tbi, + ch_fasta, + ch_fasta_fai.map{ fasta_fai -> [[id:'genome'], fasta_fai]}, + ch_dict + ) + + ch_filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf + ch_final_vcf = ch_filtered_vcf + ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION.out.versions) + } + + // + // SUBWORKFLOW: Annotate variants using snpEff and Ensembl VEP if enabled. + // + if((!params.skip_variantannotation) && (params.annotate_tools) && (params.annotate_tools.contains('merge') || params.annotate_tools.contains('snpeff') || params.annotate_tools.contains('vep'))) { + + vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] + + VCF_ANNOTATE_ALL( + ch_final_vcf.map{meta, vcf -> [ meta + [ file_name: vcf.baseName ], vcf ] }, + vep_fasta, + params.annotate_tools, + params.snpeff_genome ? "${params.snpeff_genome}.${params.snpeff_db}" : "${params.genome}.${params.snpeff_db}", + snpeff_cache, + vep_genome, + vep_species, + vep_cache_version, + vep_cache, + vep_extra_files, + [], // bcftools_annotations, + [], //bcftools_annotations_tbi, + []) //bcftools_header_lines) + + // Gather QC reports + ch_reports = ch_reports.mix(VCF_ANNOTATE_ALL.out.reports) + ch_versions = ch_versions.mix(VCF_ANNOTATE_ALL.out.versions) + } + + } + else{ + ch_haplotypecaller_raw_index = GATK4_HAPLOTYPECALLER.out.tbi + .map{ meta, idx -> + meta.id = meta.sample + [meta, idx]} + .groupTuple() + + // + // MODULE: CombineGVCFS from GATK4 + // Merge multiple GVCF files into one GVCF + // + GATK4_COMBINEGVCFS( + ch_haplotypecaller_raw, + ch_haplotypecaller_raw_index, + ch_fasta, + ch_fai, + ch_dict + ) + ch_haplotypecaller_gvcf = GATK4_COMBINEGVCFS.out.combined_gvcf + ch_versions = ch_versions.mix(GATK4_COMBINEGVCFS.out.versions) + + // + // MODULE: Index the VCF using TABIX + // + TABIXGVCF(ch_haplotypecaller_gvcf) + + ch_haplotypecaller_gvcf_tbi = ch_haplotypecaller_gvcf + .join(TABIXGVCF.out.tbi, by: [0], remainder: true) + .join(TABIXGVCF.out.csi, by: [0], remainder: true) + .map{meta, vcf, tbi, csi -> + if (tbi) [meta, vcf, tbi] + else [meta, vcf, csi] + } + + ch_versions = ch_versions.mix(TABIXGVCF.out.versions) + + } + } + + // + // Collate and save software versions + // + ch_collated_versions = softwareVersionsToYAML(ch_versions).collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + + // + // MODULE: MultiQC + // Present summary of reads, alignment, duplicates, BSQR stats for all samples as well as workflow summary/parameters as single report + // + multiqc_report = Channel.empty() + + if (!params.skip_multiqc){ + ch_multiqc_files = Channel.empty() + + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: true)) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [] + ) + multiqc_report = MULTIQC.out.report.toList() + ch_versions = ch_versions.mix(MULTIQC.out.versions) + } + + emit: + multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/