From 3b50902113f5e6abadb58f1e7c850c6ace51f54e Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 13 Nov 2023 12:38:33 +0100 Subject: [PATCH 001/104] bump version to 2.8.0dev --- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/fasta.nf.test.snap | 2 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/novaseq.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test.snap | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 4fd6b6ea..64df13db 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index 93e19e86..5139e0ec 100644 --- a/nextflow.config +++ b/nextflow.config @@ -329,7 +329,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.7.1' + version = '2.8.0dev' doi = '10.5281/zenodo.1493841' } diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index 26ffdc7a..d7cc9dce 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index bde435d9..6350712f 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 70bdf615..420b3dd6 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 10a5f78e..2c0382f0 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 89bf199b..427cd40b 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index 0adfdad8..c211e2b6 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index abd94f98..4f64efa8 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 87538971..7dca4e3e 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 751d9a83..a31b986c 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index aae3466c..069c7fa8 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index 8fc51fa0..967f1369 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.1}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, From 77bb7ec1b7b80e7a43ab3fd6faee8b37e2cf41ff Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 13 Nov 2023 12:39:28 +0100 Subject: [PATCH 002/104] update changelog --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf0e5ce0..620c9716 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## nf-core/ampliseq version 2.8.0dev + +### `Added` + +### `Changed` + +### `Fixed` + +### `Dependencies` + +### `Removed` + ## nf-core/ampliseq version 2.7.1 - 2023-11-14 ### `Added` From 2992fb7c6a103c756e2250e23fb13af08ce16a66 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 13 Nov 2023 12:41:07 +0100 Subject: [PATCH 003/104] fix indentation --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 5139e0ec..8ca82720 100644 --- a/nextflow.config +++ b/nextflow.config @@ -329,7 +329,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.8.0dev' + version = '2.8.0dev' doi = '10.5281/zenodo.1493841' } From d86c5696b08fe2ab0dea8f04219e01c7ada55c55 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:04:12 -0500 Subject: [PATCH 004/104] Add params.qiime_ref_tax_custom in preparation of allowing custom qiime database. --- nextflow.config | 1 + nextflow_schema.json | 5 +++++ workflows/ampliseq.nf | 15 +++++++++++---- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/nextflow.config b/nextflow.config index 93e19e86..1e5a567f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -108,6 +108,7 @@ params { cut_dada_ref_taxonomy = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null + qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da..69820e8c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -432,6 +432,11 @@ "greengenes85" ] }, + "qiime_ref_tax_custom": { + "type": "string", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.", + "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" + }, "classifier": { "type": "string", "description": "Path to QIIME2 trained classifier file (typically *-classifier.qza)", diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 05ddfee7..eee62b97 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -59,9 +59,16 @@ if (params.dada_ref_tax_custom) { val_dada_ref_taxonomy = "none" } -if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { +if (params.qiime_ref_tax_custom) { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + val_qiime_ref_taxonomy = "user" +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {. ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } -} else { ch_qiime_ref_taxonomy = Channel.empty() } + val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') +} else { + ch_qiime_ref_taxonomy = Channel.empty() + val_qiime_ref_taxonomy = "none" +} if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { ch_sintax_ref_taxonomy = Channel.fromList(params.sintax_ref_databases[params.sintax_ref_taxonomy]["file"]).map { file(it) } @@ -131,7 +138,7 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } //only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -552,7 +559,7 @@ workflow AMPLISEQ { //QIIME2 if ( run_qiime2 ) { - if (params.qiime_ref_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), params.FW_primer, From 439097c93751277109c643359abb11f7ab158a14 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:05:14 -0500 Subject: [PATCH 005/104] Implementation of logic to handle a custom qiime2 reference database stored in either a directory or a tarball. --- subworkflows/local/qiime2_preptax.nf | 29 ++++++++++++++++++++++++++-- workflows/ampliseq.nf | 1 + 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7f3cb80b..429aac71 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -2,6 +2,7 @@ * Training of a classifier with QIIME2 */ +include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -9,13 +10,37 @@ include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' workflow QIIME2_PREPTAX { take: ch_qiime_ref_taxonomy //channel, list of files + val_qiime_ref_taxonomy //val FW_primer //val RV_primer //val main: - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + if (params.qiime_ref_tax_custom) { + ch_qiime_ref_taxonomy + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_taxonomy } + ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_taxonomy.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir) + + ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } + } else { + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + + ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) + } - ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) ch_ref_database .map { db -> diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index eee62b97..03d1f6c4 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -562,6 +562,7 @@ workflow AMPLISEQ { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), + val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer ) From d924110622ce991ef6075520118717b87c816a34 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:28:57 -0500 Subject: [PATCH 006/104] Add greengenes2 2022.10 to qiime ref databases. --- bin/taxref_reformat_qiime_greengenes2022.sh | 8 ++++++++ conf/ref_databases.config | 6 ++++++ 2 files changed, 14 insertions(+) create mode 100755 bin/taxref_reformat_qiime_greengenes2022.sh diff --git a/bin/taxref_reformat_qiime_greengenes2022.sh b/bin/taxref_reformat_qiime_greengenes2022.sh new file mode 100755 index 00000000..1d84e702 --- /dev/null +++ b/bin/taxref_reformat_qiime_greengenes2022.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +# Decompress files. +gzip -d *.gz + +# Select and rename files +mv *.fna greengenes2022.fna +mv *.tsv greengenes2022.tax diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c80820ec..c7d9f251 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -306,6 +306,12 @@ params { citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139" fmtscript = "taxref_reformat_qiime_greengenes85.sh" } + 'greengenes2022' { + title = "Greengenes2 16S - Version 2022.10" + file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] + citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" + fmtscript = "taxref_reformat_qiime_greengenes2022.sh" + } } //Sintax taxonomic reference databases sintax_ref_databases { From 10828417bcbc7ff3fb62a7bef96bbf49eb3c630a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:41:45 -0500 Subject: [PATCH 007/104] Add greengenes2022 to acceptable qiime_ref_taxonomy values. --- nextflow_schema.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da..08973133 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -429,7 +429,8 @@ "unite-alleuk=8.3", "unite-alleuk=8.2", "unite-alleuk", - "greengenes85" + "greengenes85", + "greengenes2022" ] }, "classifier": { From 4612cd11bb16d4f27132bc9b1370dc1c8419e58a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 12:50:31 -0500 Subject: [PATCH 008/104] gzip does not operate on symbolic links unless forced or, as now, told to emit result to stdout. --- bin/taxref_reformat_qiime_greengenes2022.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/taxref_reformat_qiime_greengenes2022.sh b/bin/taxref_reformat_qiime_greengenes2022.sh index 1d84e702..aa4678a8 100755 --- a/bin/taxref_reformat_qiime_greengenes2022.sh +++ b/bin/taxref_reformat_qiime_greengenes2022.sh @@ -1,7 +1,8 @@ #!/bin/sh # Decompress files. -gzip -d *.gz +gzip -c -d 2022.10.seqs.fna.gz > 2022.10.seqs.fna +gzip -c -d 2022.10.taxonomy.md5.tsv.gz > 2022.10.taxonomy.md5.tsv # Select and rename files mv *.fna greengenes2022.fna From 14c89b9c22faec6544a1ffa97e28afe989e6336f Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:53:05 -0500 Subject: [PATCH 009/104] Some params checking logic. --- lib/WorkflowAmpliseq.groovy | 6 +++--- lib/WorkflowMain.groovy | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 5e103911..0868866a 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -77,12 +77,12 @@ class WorkflowAmpliseq { } if (params.skip_dada_taxonomy && params.sbdiexport) { - if (!params.sintax_ref_taxonomy && (params.skip_qiime || !params.qiime_ref_taxonomy)) { + if (!params.sintax_ref_taxonomy && (params.skip_qiime || (!params.qiime_ref_taxonomy && !params.qiime_ref_tax_custom))) { Nextflow.error("Incompatible parameters: `--sbdiexport` expects taxa annotation and therefore annotation with either DADA2, SINTAX, or QIIME2 is needed.") } } - if ( (!params.FW_primer || !params.RV_primer) && params.qiime_ref_taxonomy && !params.skip_qiime && !params.skip_taxonomy ) { + if ( (!params.FW_primer || !params.RV_primer) && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_qiime && !params.skip_taxonomy ) { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the QIIME2 reference database to the amplicon sequences. Please specify primers or do not use `--qiime_ref_taxonomy`.") } @@ -90,7 +90,7 @@ class WorkflowAmpliseq { Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for cutting the DADA2 reference database to the amplicon sequences. Please specify primers or do not use `--cut_dada_ref_taxonomy`.") } - if (params.qiime_ref_taxonomy && params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 7f49735e..4b7ec2af 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -34,7 +34,7 @@ class WorkflowMain { if (params.sintax_ref_taxonomy && !params.skip_taxonomy) { sintaxreftaxonomyExistsError(params, log) } - if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { + if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.skip_taxonomy && !params.classifier) { qiimereftaxonomyExistsError(params, log) } From d214ec0252321261d56b315d0b34797432011442 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:55:09 -0500 Subject: [PATCH 010/104] Loose . lying around. --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 03d1f6c4..3d4ad07d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -62,7 +62,7 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) val_qiime_ref_taxonomy = "user" -} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) {. +} else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } val_qiime_ref_taxonomy = params.qiime_ref_taxonomy.replace('=','_').replace('.','_') } else { From 9346d7aea37c365e31ad37ae8ffa08a51a2ab8a4 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 14:57:54 -0500 Subject: [PATCH 011/104] Only perform collect if going to FORMAT_TAXONOMY_QIIME. --- subworkflows/local/qiime2_preptax.nf | 2 +- workflows/ampliseq.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 429aac71..0d66308a 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -36,7 +36,7 @@ workflow QIIME2_PREPTAX { ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } } else { - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3d4ad07d..b45fd9a5 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -561,7 +561,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy.collect(), + ch_qiime_ref_taxonomy, val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From ef053b1369e5f75aad794772e387257b06e99d35 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:13:23 -0500 Subject: [PATCH 012/104] =?UTF-8?q?Set=20into=20new=20channel=20when=20bra?= =?UTF-8?q?nching=20on=20ch=5Fqiime=5Fref=5Ftaxonomy.=C2=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- subworkflows/local/qiime2_preptax.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 0d66308a..38a9faf8 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -21,18 +21,18 @@ workflow QIIME2_PREPTAX { tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) dir: it.isDirectory() failed: true - }.set { ch_qiime_ref_taxonomy } - ch_qiime_ref_taxonomy.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } UNTAR ( - ch_qiime_ref_taxonomy.tar + ch_qiime_ref_tax_branched.tar .map { db -> def meta = [:] meta.id = val_qiime_ref_taxonomy [ meta, db ] } ) ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_taxonomy.dir) + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } } else { From a48a09fe3f7398ce15fc8b0b1bc449b92c880ae8 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:48:14 -0500 Subject: [PATCH 013/104] Try to unpack the database dir into component files using a module. --- modules/local/qiime2_unpack.nf | 31 ++++++++++++++++++++++++++++ subworkflows/local/qiime2_preptax.nf | 5 ++++- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 modules/local/qiime2_unpack.nf diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf new file mode 100644 index 00000000..e77286da --- /dev/null +++ b/modules/local/qiime2_unpack.nf @@ -0,0 +1,31 @@ +process QIIME2_UNPACK { + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : + 'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }" + + input: + path(database) + + output: + path("*.fna"), emit: fasta + path("*.tax"), emit: tax + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + // TODO: need to not have this be a copy. + script: + """ + cp $database/*.fna . + cp $database/*.tax . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') + END_VERSIONS + """ +} diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 38a9faf8..3ad8365f 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -6,6 +6,7 @@ include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' +include { QIIME2_UNPACK } from '../../modules/local/qiime2_unpack' workflow QIIME2_PREPTAX { take: @@ -34,7 +35,9 @@ workflow QIIME2_PREPTAX { ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - ch_ref_database = ch_qiime_db_dir.map{ Channel.fromPath(it + "/*.tax").combine(Channel.fromPath(it + "/*.fna")) } + QIIME2_UNPACK(ch_qiime_db_dir) + + ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From a9971b68ede12d0431d248362d1c279b7ac0cc07 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 15:59:03 -0500 Subject: [PATCH 014/104] Remove map wrapping the combine. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 3ad8365f..8229190a 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -37,7 +37,7 @@ workflow QIIME2_PREPTAX { QIIME2_UNPACK(ch_qiime_db_dir) - ch_ref_database = ch_qiime_db_dir.map{ QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } + ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From aac51bd4b162e17ed0f9ca5864527651abf4e23d Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:14:05 -0500 Subject: [PATCH 015/104] Remove unpack in favour of map and filter. --- modules/local/qiime2_unpack.nf | 31 ---------------------------- subworkflows/local/qiime2_preptax.nf | 16 +++++++++++--- 2 files changed, 13 insertions(+), 34 deletions(-) delete mode 100644 modules/local/qiime2_unpack.nf diff --git a/modules/local/qiime2_unpack.nf b/modules/local/qiime2_unpack.nf deleted file mode 100644 index e77286da..00000000 --- a/modules/local/qiime2_unpack.nf +++ /dev/null @@ -1,31 +0,0 @@ -process QIIME2_UNPACK { - label 'process_low' - - conda "conda-forge::sed=4.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'docker.io/biocontainers/biocontainers:v1.2.0_cv1' }" - - input: - path(database) - - output: - path("*.fna"), emit: fasta - path("*.tax"), emit: tax - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - // TODO: need to not have this be a copy. - script: - """ - cp $database/*.fna . - cp $database/*.tax . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') - END_VERSIONS - """ -} diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 8229190a..2969d370 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -6,7 +6,6 @@ include { UNTAR } from '../../modules/nf-core/untar/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' -include { QIIME2_UNPACK } from '../../modules/local/qiime2_unpack' workflow QIIME2_PREPTAX { take: @@ -35,9 +34,20 @@ workflow QIIME2_PREPTAX { ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - QIIME2_UNPACK(ch_qiime_db_dir) + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." + ! it instanceof List + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." + ! it instanceof List + } - ch_ref_database = QIIME2_UNPACK.out.fasta.combine(QIIME2_UNPACK.out.tax) + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From 1b2825ec952b9d2d4d8edb8cbd348fd843d42223 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:17:38 -0500 Subject: [PATCH 016/104] Glob results in list in all circumstances, check length instead. --- subworkflows/local/qiime2_preptax.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 2969d370..f040f207 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -37,14 +37,14 @@ workflow QIIME2_PREPTAX { ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> files = file(dir.resolve("*.fna"), checkIfExists: true) } | filter { - if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." - ! it instanceof List + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 } ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> files = file(dir.resolve("*.tax"), checkIfExists: true) } | filter { - if (it instanceof List) log.warn "Found multiple fasta files for QIIME2 reference database." - ! it instanceof List + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 } ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) From c230c2261a7a55e6070c649b905d9cbd2a7d8982 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:42:02 -0500 Subject: [PATCH 017/104] Update usage.md. --- docs/usage.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/usage.md b/docs/usage.md index 38c2cc23..721523fc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -225,6 +225,7 @@ Pre-configured reference taxonomy databases are: | sbdi-gtdb | + | - | - | - | 16S rRNA | | rdp | + | - | + | - | 16S rRNA | | greengenes | - | - | + | (+)¹ | 16S rRNA | +| greengenes2 | - | - | - | + | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | | unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | From 1dd0d8ed5376cd76b6e440ffa3bec9a1da7bc9c6 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:49:45 -0500 Subject: [PATCH 018/104] Update CHANGELOG.md. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620c9716..c44c74f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- []() - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. + ### `Changed` ### `Fixed` From 850a603a445dcec32739439b9049ac38f5446e2f Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 16:53:03 -0500 Subject: [PATCH 019/104] Add pull request link to changelog. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c44c74f3..10456d80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- []() - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. +- [#666](https://github.com/nf-core/ampliseq/pull/666) - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. ### `Changed` From a4219a0baa0e4b256ee97c5c1a54a566fb801f07 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 28 Nov 2023 17:11:59 -0500 Subject: [PATCH 020/104] Update CHANGELOG.md. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620c9716..b884a408 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. + ### `Changed` ### `Fixed` From 0ccf6e6daf7b46be84a81fd9498d2e55fc5db795 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 10:12:47 -0500 Subject: [PATCH 021/104] Update error message when passing both one of --qiime_ref_taxonomy or --qiime_ref_tax_custom and --classifier. --- lib/WorkflowAmpliseq.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy index 0868866a..25db3ed6 100755 --- a/lib/WorkflowAmpliseq.groovy +++ b/lib/WorkflowAmpliseq.groovy @@ -91,7 +91,7 @@ class WorkflowAmpliseq { } if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && params.classifier) { - Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") + Nextflow.error("Incompatible parameters: `--qiime_ref_taxonomy` and `--qiime_ref_tax_custom` will produce a classifier but `--classifier` points to a precomputed classifier, therefore, only use one of those.") } if (params.kraken2_ref_tax_custom && !params.kraken2_assign_taxlevels ) { From 590f415952b249d83ae5fcd7d128b109310d0983 Mon Sep 17 00:00:00 2001 From: Matthew Date: Wed, 29 Nov 2023 11:13:35 -0500 Subject: [PATCH 022/104] Update CHANGELOG.md with pull request number. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b884a408..6c2030cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -[]() - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. +[#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification. ### `Changed` From f5d80f572e008693d3f83dd5a8f169784da546c7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 11:58:33 -0500 Subject: [PATCH 023/104] Add support for specifying two (possibly gzipped) files as --qiime_ref_tax_custom. --- modules/local/gzip_decompress.nf | 32 ++++++++++++ nextflow_schema.json | 2 +- subworkflows/local/qiime2_preptax.nf | 75 ++++++++++++++++++---------- workflows/ampliseq.nf | 6 ++- 4 files changed, 86 insertions(+), 29 deletions(-) create mode 100644 modules/local/gzip_decompress.nf diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf new file mode 100644 index 00000000..fa8fa82c --- /dev/null +++ b/modules/local/gzip_decompress.nf @@ -0,0 +1,32 @@ +process GZIP_DECOMPRESS { + tag "$file" + label 'process_single' + + conda "conda-forge::sed=4.7 conda-forge::gzip=1.13" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path(file) + + output: + path("$outfile"), emit: ungzip + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, "")) + + """ + gzip $args -c -d $file > $outfile + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 69820e8c..c6c6f8b0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,7 +434,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database.", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.", "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" }, "classifier": { diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index f040f207..7cc4817d 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -3,6 +3,7 @@ */ include { UNTAR } from '../../modules/nf-core/untar/main' +include { GZIP_DECOMPRESS } from '../../modules/local/gzip_decompress.nf' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -15,36 +16,56 @@ workflow QIIME2_PREPTAX { RV_primer //val main: + ch_qiime2_preptax_versions = Channel.empty() + if (params.qiime_ref_tax_custom) { - ch_qiime_ref_taxonomy - .branch { - tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) - dir: it.isDirectory() - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + if (ch_qiime_ref_taxonomy.size() == 2) { + ch_qiime_ref_taxonomy + .branch { + gzip: it.isFile() && ( it.getName().endsWith(".gz") ) + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - UNTAR ( - ch_qiime_ref_tax_branched.tar - .map { - db -> - def meta = [:] - meta.id = val_qiime_ref_taxonomy - [ meta, db ] } ) - ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.fna"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." - it.size() == 1 - } - ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.tax"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." - it.size() == 1 + ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + + ch_ref_database = ch_qiime_db_files.collate(2) + } else { + ch_qiime_ref_taxonomy + .branch { + tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) + dir: it.isDirectory() + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + + UNTAR ( + ch_qiime_ref_tax_branched.tar + .map { + db -> + def meta = [:] + meta.id = val_qiime_ref_taxonomy + [ meta, db ] } ) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } + ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) + + ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.fna"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." + it.size() == 1 + } + ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> + files = file(dir.resolve("*.tax"), checkIfExists: true) + } | filter { + if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." + it.size() == 1 + } } ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index b45fd9a5..9bd1cf5c 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -60,7 +60,11 @@ if (params.dada_ref_tax_custom) { } if (params.qiime_ref_tax_custom) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + if ("${params.qiime_ref_tax_custom}".contains(",")) { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) + } else { + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + } val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } From 7016682fd8375525b2e23e3fdebaa01cbdd8f082 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:07:43 -0500 Subject: [PATCH 024/104] Only support providing two files separated by a comma. --- nextflow_schema.json | 4 +- subworkflows/local/qiime2_preptax.nf | 59 ++++++---------------------- workflows/ampliseq.nf | 10 ++--- 3 files changed, 20 insertions(+), 53 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c6c6f8b0..79a4cebb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,8 +434,8 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. Can be compressed tar archive (.tar.gz|.tgz), a pair of (possibly gzipped) filepaths, or folder containing the database.", - "description": "Path to a custom QIIME2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" + "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.", + "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { "type": "string", diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7cc4817d..a4e1d776 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,56 +19,23 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { - if (ch_qiime_ref_taxonomy.size() == 2) { - ch_qiime_ref_taxonomy - .branch { - gzip: it.isFile() && ( it.getName().endsWith(".gz") ) - decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + ch_qiime_ref_taxonomy.view() - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + // ch_qiime_ref_taxonomy + // .branch { + // gzip: it.isFile() && ( it.getName().endsWith(".gz") ) + // decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) + // failed: true + // }.set { ch_qiime_ref_tax_branched } + // ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip - ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + // GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + // ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_ref_database = ch_qiime_db_files.collate(2) - } else { - ch_qiime_ref_taxonomy - .branch { - tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) - dir: it.isDirectory() - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a directory nor a file that ends in '.tar.gz' or '.tgz'. Please review input." } + // ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + // ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - UNTAR ( - ch_qiime_ref_tax_branched.tar - .map { - db -> - def meta = [:] - meta.id = val_qiime_ref_taxonomy - [ meta, db ] } ) - ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } - ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) - - ch_ref_database_fna = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.fna"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple fasta files for QIIME2 reference database." - it.size() == 1 - } - ch_ref_database_tax = ch_qiime_db_dir.map{ dir -> - files = file(dir.resolve("*.tax"), checkIfExists: true) - } | filter { - if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." - it.size() == 1 - } - } - - ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + // ch_ref_database = ch_qiime_db_files.collate(2) } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9bd1cf5c..07df8960 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -60,11 +60,11 @@ if (params.dada_ref_tax_custom) { } if (params.qiime_ref_tax_custom) { - if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) - } else { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) + if (!"${params.qiime_ref_tax_custom}".contains(",")) { + error "--qiime_ref_tax_custom takes two filepaths separated by a comma. Please review input." } + + ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) val_qiime_ref_taxonomy = "user" } else if (params.qiime_ref_taxonomy && !params.skip_taxonomy && !params.classifier) { ch_qiime_ref_taxonomy = Channel.fromList(params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]).map { file(it) } @@ -565,7 +565,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy, + ch_qiime_ref_taxonomy.collect(), val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From 79cbfe8fb0bf50035529deb5fe24d18693784b75 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:41:49 -0500 Subject: [PATCH 025/104] Fix split returns a String[] and we actually need an ArrayList. --- subworkflows/local/qiime2_preptax.nf | 2 +- workflows/ampliseq.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 7cc4817d..f36dda42 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,7 +19,7 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { - if (ch_qiime_ref_taxonomy.size() == 2) { + if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy .branch { gzip: it.isFile() && ( it.getName().endsWith(".gz") ) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9bd1cf5c..3a332326 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -61,7 +61,7 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}".split(","), checkIfExists: true) + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true) } else { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) } @@ -565,7 +565,7 @@ workflow AMPLISEQ { if ( run_qiime2 ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( - ch_qiime_ref_taxonomy, + ch_qiime_ref_taxonomy.collect(), val_qiime_ref_taxonomy, params.FW_primer, params.RV_primer From 6d767bc1ea80aa464d80b4aae48944759a44f3a6 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:46:30 -0500 Subject: [PATCH 026/104] Move ch_ref_database set into correct scope. --- subworkflows/local/qiime2_preptax.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index f36dda42..19a9bc4b 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -66,9 +66,9 @@ workflow QIIME2_PREPTAX { if (it.size() > 1) log.warn "Found multiple tax files for QIIME2 reference database." it.size() == 1 } - } - ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) + } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) From f76b49bbcd56315189603e0e292dba2e108f68e5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 14:53:35 -0500 Subject: [PATCH 027/104] Try using map to work through list of files. --- subworkflows/local/qiime2_preptax.nf | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 19a9bc4b..96a0db96 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -21,18 +21,19 @@ workflow QIIME2_PREPTAX { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy - .branch { - gzip: it.isFile() && ( it.getName().endsWith(".gz") ) - decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith (".tax") ) - failed: true - }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } - - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + .map { filepath -> + candidate = file(filepath, checkIfExists: true) + if (filepath.endsWith(".gz")) { + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip - ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) + return GZIP_DECOMPRESS.out.ungzip + } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) { + return candidate + } else { + error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input." + } + }.set { ch_qiime_db_files } ch_ref_database = ch_qiime_db_files.collate(2) } else { From 0890a0e64beb641422698f61c2adfee3f0db46a7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:17:56 -0500 Subject: [PATCH 028/104] Can't call processes from inside maps. --- subworkflows/local/qiime2_preptax.nf | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 96a0db96..78a4ab27 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -20,24 +20,23 @@ workflow QIIME2_PREPTAX { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy - .map { filepath -> - candidate = file(filepath, checkIfExists: true) - if (filepath.endsWith(".gz")) { - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.gzip) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + ch_qiime_ref_taxonomy.flatten() + .branch { + compressed: it.isFile() && it.getName().endsWith(".gz") + decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) + failed: true + }.set { ch_qiime_ref_tax_branched } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + + GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) - return GZIP_DECOMPRESS.out.ungzip - } else if (filepath.endsWith(".fna") || filepath.endsWith(".tax")) { - return candidate - } else { - error "$filepath is neither a compressed or decompressed sequence or taxonomy file. Please review input." - } - }.set { ch_qiime_db_files } + ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database = ch_qiime_db_files.collate(2) } else { - ch_qiime_ref_taxonomy + ch_qiime_ref_taxonomy.flatten() .branch { tar: it.isFile() && ( it.getName().endsWith(".tar.gz") || it.getName().endsWith (".tgz") ) dir: it.isDirectory() From 7276a8d1e36eb176fa4171c46d34aed24e4c8fad Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:20:25 -0500 Subject: [PATCH 029/104] Fix outfile definition in GZIP_DECOMPRESS. --- modules/local/gzip_decompress.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf index fa8fa82c..effd256c 100644 --- a/modules/local/gzip_decompress.nf +++ b/modules/local/gzip_decompress.nf @@ -19,7 +19,7 @@ process GZIP_DECOMPRESS { script: def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.gz$/, "")) + outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "") """ gzip $args -c -d $file > $outfile From f0a8715a9cc33c520e2047f0f45c35093c4a28c5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:21:45 -0500 Subject: [PATCH 030/104] Fix outfile definition in GZIP_DECOMPRESS. --- modules/local/gzip_decompress.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf index effd256c..c6ea37a5 100644 --- a/modules/local/gzip_decompress.nf +++ b/modules/local/gzip_decompress.nf @@ -19,7 +19,7 @@ process GZIP_DECOMPRESS { script: def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: archive.baseName.toString().replaceFirst(/\.gz$/, "") + outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "") """ gzip $args -c -d $file > $outfile From 56f241b05ecbbf1f068c3ba7ee9c11b749c92e16 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:48:34 -0500 Subject: [PATCH 031/104] Add some comments. --- subworkflows/local/qiime2_preptax.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 78a4ab27..561a75dc 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -19,6 +19,7 @@ workflow QIIME2_PREPTAX { ch_qiime2_preptax_versions = Channel.empty() if (params.qiime_ref_tax_custom) { + // Handle case where we have been provided a pair of filepaths. if ("${params.qiime_ref_tax_custom}".contains(",")) { ch_qiime_ref_taxonomy.flatten() .branch { @@ -35,6 +36,7 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database = ch_qiime_db_files.collate(2) + // Handle case we have been provided a single filepath (tarball or directory). } else { ch_qiime_ref_taxonomy.flatten() .branch { From 7907df5bfb4843a80352e8de18265c7331c4ba8a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:49:02 -0500 Subject: [PATCH 032/104] Add an early check that two paths are provided when providing a comma-separated list. --- workflows/ampliseq.nf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3a332326..a99e71f9 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -61,7 +61,12 @@ if (params.dada_ref_tax_custom) { if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { - ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList("${params.qiime_ref_tax_custom}".split(",")), checkIfExists: true) + qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") + if (qiime_ref_paths.length != 2) { + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input." + } + + ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) } else { ch_qiime_ref_taxonomy = Channel.fromPath("${params.qiime_ref_tax_custom}", checkIfExists: true) } From 1d6ce32e4911f86ebaf02af447db6fdcb3cadb47 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Wed, 29 Nov 2023 15:53:08 -0500 Subject: [PATCH 033/104] Make sure downstream is aware of new means of pointing to a qiime ref db. --- workflows/ampliseq.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index a99e71f9..9195038d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -624,7 +624,7 @@ workflow AMPLISEQ { log.info "Use Kraken2 taxonomy classification" val_used_taxonomy = "Kraken2" ch_tax = QIIME2_INTAX ( ch_kraken2_tax, "" ).qza - } else if ( params.qiime_ref_taxonomy || params.classifier ) { + } else if ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) { log.info "Use QIIME2 taxonomy classification" val_used_taxonomy = "QIIME2" ch_tax = QIIME2_TAXONOMY.out.qza @@ -724,7 +724,7 @@ workflow AMPLISEQ { // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 // if ( params.picrust ) { - if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { + if ( run_qiime2 && !params.skip_abundance_tables && ( params.dada_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier || params.sintax_ref_taxonomy || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) && !params.skip_taxonomy ) { PICRUST ( QIIME2_EXPORT.out.abs_fasta, QIIME2_EXPORT.out.abs_tsv, "QIIME2", "This Picrust2 analysis is based on filtered reads from QIIME2" ) } else { PICRUST ( ch_fasta, ch_dada2_asv, "DADA2", "This Picrust2 analysis is based on unfiltered reads from DADA2" ) @@ -856,7 +856,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", From 913d284b4c6dd54a20c9dbe273656c30df2888f3 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 1 Dec 2023 14:13:39 -0500 Subject: [PATCH 034/104] Improve error message clarity for ill-formed file. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 561a75dc..7a5960af 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -27,7 +27,7 @@ workflow QIIME2_PREPTAX { decompressed: it.isFile() && ( it.getName().endsWith(".fna") || it.getName().endsWith(".tax") ) failed: true }.set { ch_qiime_ref_tax_branched } - ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed or decompressed sequence or taxonomy file. Please review input." } + ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." } GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) From 330bf43e19cf42f87f9a9213928724fe42cbc98c Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:34:05 -0500 Subject: [PATCH 035/104] Fix typo in error on --qiime_ref_paths form. --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9195038d..faeffec6 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -63,7 +63,7 @@ if (params.qiime_ref_tax_custom) { if ("${params.qiime_ref_tax_custom}".contains(",")) { qiime_ref_paths = "${params.qiime_ref_tax_custom}".split(",") if (qiime_ref_paths.length != 2) { - error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two files paths separated by a comma. Please review input." + error "--qiime_ref_tax_custom accepts a single filepath to a directory or tarball, or two filepaths separated by a comma. Please review input." } ch_qiime_ref_taxonomy = Channel.fromPath(Arrays.asList(qiime_ref_paths), checkIfExists: true) From b8c595cc7a647593656945cc34c55861986c2038 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:41:10 -0500 Subject: [PATCH 036/104] Remove unneeded collect from FORMAT_TAXONOMY_QIIME invocation. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 561a75dc..d1ed888a 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -72,7 +72,7 @@ workflow QIIME2_PREPTAX { ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) } } else { - FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy.collect() ) + FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } From 3e05fe507674b72cc1757063fb4621f47defd4d9 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 14:47:32 -0500 Subject: [PATCH 037/104] Improve version tracking in PREPTAX. --- subworkflows/local/qiime2_preptax.nf | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index d1ed888a..050ad78c 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -53,6 +53,8 @@ workflow QIIME2_PREPTAX { def meta = [:] meta.id = val_qiime_ref_taxonomy [ meta, db ] } ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(UNTAR.out.versions) + ch_qiime_db_dir = UNTAR.out.untar.map{ it[1] } ch_qiime_db_dir = ch_qiime_db_dir.mix(ch_qiime_ref_tax_branched.dir) @@ -73,6 +75,7 @@ workflow QIIME2_PREPTAX { } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) + ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } @@ -85,10 +88,14 @@ workflow QIIME2_PREPTAX { meta.RV_primer = RV_primer [ meta, db ] } .set { ch_ref_database } + QIIME2_EXTRACT ( ch_ref_database ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_EXTRACT.out.versions) + QIIME2_TRAIN ( QIIME2_EXTRACT.out.qza ) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(QIIME2_TRAIN.out.versions) emit: - classifier = QIIME2_TRAIN.out.qza - versions = QIIME2_TRAIN.out.versions + classifier = QIIME2_TRAIN.out.qza + versions = ch_qiime2_preptax_versions } From b012aeb0792d86f5edcd1dc81b34049fc7b7b16d Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 1 Dec 2023 15:11:18 -0500 Subject: [PATCH 038/104] Add qiime_ref_tax_custom to testing in reftaxcustom nf-test. --- conf/test_reftaxcustom.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 4233d1ea..c2d7c4ee 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,6 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime = true From 7ce4fa815d297986b9837a3349c9451b49b74073 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Tue, 5 Dec 2023 12:02:21 +0000 Subject: [PATCH 039/104] Patch summary_report as it expects qiime_ref_taxonomy is set even when it can be null due to classifier being passed. --- modules/local/summary_report.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index d886f19b..a8e082b0 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -118,7 +118,7 @@ process SUMMARY_REPORT { kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "", kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", + qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", From d83deb097a671bef2f947b1f8e73e6a17ae4471f Mon Sep 17 00:00:00 2001 From: Dan Clayton Date: Wed, 6 Dec 2023 15:23:11 +0000 Subject: [PATCH 040/104] update output docs for collapsed abundance tables --- docs/output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index f12fc41f..62433cea 100644 --- a/docs/output.md +++ b/docs/output.md @@ -407,7 +407,7 @@ All following analysis is based on these filtered tables. - `seven_number_summary.tsv`: Length of ASV sequences in different quantiles. - `filtered-sequences.qza`: QIIME2 fragment. - `qiime2/abundance_tables/` - - `abs-abund-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database. + - `abs-abund-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. - `count_table_filter_stats.tsv`: Tab-separated table with information on how much counts were filtered for each sample. - `feature-table.biom`: Abundance table in biom format for importing into downstream analysis tools. - `feature-table.tsv`: Tab-separated abundance table for each ASV and each sample. @@ -423,7 +423,7 @@ Absolute abundance tables produced by the previous steps contain count data, but Output files - `qiime2/rel_abundance_tables/` - - `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database. + - `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6, specified by the `--tax_agglom_min` and `--tax_agglom_max` parameters. - `rel-table-ASV.tsv`: Tab-separated relative abundance table for all ASVs. - `rel-table-ASV_with-DADA2-tax.tsv`: Tab-separated table for all ASVs with DADA2 taxonomic classification, sequence and relative abundance. - `rel-table-ASV_with-QIIME2-tax.tsv`: Tab-separated table for all ASVs with QIIME2 taxonomic classification, sequence and relative abundance. From d8ea1f03225d237161a95f420416056407f558aa Mon Sep 17 00:00:00 2001 From: Dan Clayton Date: Thu, 7 Dec 2023 09:20:58 +0000 Subject: [PATCH 041/104] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 620c9716..e51badb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` ### `Fixed` +- [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables ### `Dependencies` From feacdd59242c8af026a4ded19447c6c3b724d19e Mon Sep 17 00:00:00 2001 From: Dan Clayton Date: Thu, 7 Dec 2023 11:42:31 +0000 Subject: [PATCH 042/104] run prettier --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e51badb3..b0879526 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` ### `Fixed` + - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables ### `Dependencies` From a8ec74a2f10ca3d79f5384f5078838fcde1d74b5 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 15:40:10 +0000 Subject: [PATCH 043/104] Update changelog. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0879526..7639cf7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables +- []() - Fix logic relating to generation of qiime2 taxonomy part of summary report ### `Dependencies` From ba71667c714ff22258bc7de7f0126dda8e8bcfe0 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 15:41:55 +0000 Subject: [PATCH 044/104] Don't skip qiime while testing. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index c2d7c4ee..870a59e2 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -33,5 +33,5 @@ params { qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 - skip_qiime = true + skip_qiime = false } From 1360415a39ae3aaf659a8201ef1958a1fb53414e Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:00:27 +0000 Subject: [PATCH 045/104] Add a skip option for just downstream qiime analysis, but still perform qiime taxonomic classification. --- nextflow.config | 1 + nextflow_schema.json | 4 ++++ workflows/ampliseq.nf | 13 ++++++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index c4e94bfb..c76b4058 100644 --- a/nextflow.config +++ b/nextflow.config @@ -87,6 +87,7 @@ params { skip_dada_quality = false skip_barrnap = false skip_qiime = false + skip_qiime_downstream = false skip_fastqc = false skip_alpha_rarefaction = false skip_abundance_tables = false diff --git a/nextflow_schema.json b/nextflow_schema.json index c6c6f8b0..2b4a8dca 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -656,6 +656,10 @@ "type": "boolean", "description": "Skip all steps that are executed by QIIME2, including QIIME2 software download, taxonomy assignment by QIIME2, barplots, relative abundance tables, diversity analysis, differential abundance testing." }, + "skip_qiime_downstream": { + "type": "boolean", + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing." + }, "skip_taxonomy": { "type": "boolean", "description": "Skip taxonomic classification. Incompatible with `--sbdiexport`" diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index faeffec6..9e85bf6a 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -146,8 +146,15 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_da } } -//only run QIIME2 when taxonomy is actually calculated and all required data is available -if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { +// Only run QIIME2 taxonomy classification if needed parameters are passed and we are not skipping taxonomy or qiime steps. +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && (params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier) ) { + run_qiime2_taxonomy = true +} else { + run_qiime2_taxonomy = false +} + +//only run QIIME2 downstream analysis when taxonomy is actually calculated and all required data is available +if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom) ) { run_qiime2 = true } else { run_qiime2 = false @@ -567,7 +574,7 @@ workflow AMPLISEQ { } //QIIME2 - if ( run_qiime2 ) { + if ( run_qiime2_taxonomy ) { if ((params.qiime_ref_taxonomy || params.qiime_ref_tax_custom) && !params.classifier) { QIIME2_PREPTAX ( ch_qiime_ref_taxonomy.collect(), From f4f5cda41b32a83c133b6678f8c8f4537d9b65ed Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:02:06 +0000 Subject: [PATCH 046/104] Skip qiime downstream in reftaxcustom. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 870a59e2..ea8a7c6d 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -33,5 +33,5 @@ params { qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 - skip_qiime = false + skip_qiime_downstream = true } From 549c166365bf68f4edecff6121597cbcb01c8b99 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:21:06 +0000 Subject: [PATCH 047/104] Fix path for testing tarball passed to --qiime_ref_tax_custom. --- conf/test_reftaxcustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index ea8a7c6d..1afe1c2d 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,7 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From 851653404641128a88899ef1e3ceb88e79c945a9 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:38:04 +0000 Subject: [PATCH 048/104] Add snapshot of files coming from qiime2 taxonomy. --- tests/pipeline/reftaxcustom.nf.test.snap | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 7dca4e3e..842b18de 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -43,12 +43,19 @@ "timestamp": "2023-05-28T21:18:54+0000" }, "kraken2": { + "content": [ + "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", + "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" + ], + "timestamp": "2023-09-15T21:16:26+0000" + }, + "qiime2": { "content": [ "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" ], - "timestamp": "2023-09-15T21:16:26+0000" + "timestamp": "2023-12-07T21:28:32+0000" }, "multiqc": { "content": [ From 745cab7de07628c06f5356dcdb9f8e64321bd074 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:39:18 +0000 Subject: [PATCH 049/104] Work towards a qiime_ref_tax_custom specific test. --- .github/workflows/ci.yml | 1 + conf/test_qiimecustom.config | 32 ++++++++++++++ nextflow.config | 1 + tests/pipeline/qiimecustom.nf.test | 55 +++++++++++++++++++++++++ tests/pipeline/qiimecustom.nf.test.snap | 43 +++++++++++++++++++ 5 files changed, 132 insertions(+) create mode 100644 conf/test_qiimecustom.config create mode 100644 tests/pipeline/qiimecustom.nf.test create mode 100644 tests/pipeline/qiimecustom.nf.test.snap diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 788582d9..e4b532be 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,6 +50,7 @@ jobs: - "test_failed" - "test_multi" - "test_reftaxcustom" + - "test_qiimecustom" - "test_doubleprimers" - "test_iontorrent" - "test_novaseq" diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config new file mode 100644 index 00000000..ea6b97d8 --- /dev/null +++ b/conf/test_qiimecustom.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/ampliseq -profile test_qiimecustom, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test custom QIIME2 reference taxonomy database profile' + config_profile_description = 'Minimal test dataset to check --qiime_ref_tax_custom' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + FW_primer = "GTGYCAGCMGCCGCGGTAA" + RV_primer = "GGACTACNVGGGTWTCTAAT" + input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" + + // Custom reference taxonomy + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz" + + // Skip downstream analysis with QIIME2 + skip_qiime_downstream = true +} diff --git a/nextflow.config b/nextflow.config index c76b4058..831a43a2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -274,6 +274,7 @@ profiles { test_failed { includeConfig 'conf/test_failed.config' } test_full { includeConfig 'conf/test_full.config' } test_reftaxcustom { includeConfig 'conf/test_reftaxcustom.config' } + test_qiimecustom { includeConfig 'conf/test_qiimecustom.config' } test_novaseq { includeConfig 'conf/test_novaseq.config' } test_pplace { includeConfig 'conf/test_pplace.config' } test_sintax { includeConfig 'conf/test_sintax.config' } diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test new file mode 100644 index 00000000..abd2a38a --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test @@ -0,0 +1,55 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + tag "test_reftaxcustom" + tag "dada2" + tag "pipeline" + + test("Custom DADA2 Reference Taxonomy Database") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(UTILS.removeNextflowVersion("$outputDir")).match("software_versions") }, + { assert snapshot(path("$outputDir/overall_summary.tsv")).match("overall_summary_tsv") }, + { assert snapshot(path("$outputDir/barrnap/rrna.arc.gff"), + path("$outputDir/barrnap/rrna.bac.gff"), + path("$outputDir/barrnap/rrna.euk.gff"), + path("$outputDir/barrnap/rrna.mito.gff")).match("barrnap") }, + { assert new File("$outputDir/barrnap/summary.tsv").exists() }, + { assert snapshot(path("$outputDir/cutadapt/cutadapt_summary.tsv")).match("cutadapt") }, + { assert snapshot(path("$outputDir/dada2/ASV_seqs.fasta"), + path("$outputDir/dada2/ASV_table.tsv"), + path("$outputDir/dada2/DADA2_stats.tsv"), + path("$outputDir/dada2/DADA2_table.rds"), + path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, + { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() }, + { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_1a_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2_2_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, + { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, + { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, + { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), + path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), + path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/summary_report/summary_report.html").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + ) + } + } +} diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap new file mode 100644 index 00000000..680ca37a --- /dev/null +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -0,0 +1,43 @@ +{ + "input": { + "content": [ + "Samplesheet.tsv:md5,dbf8d1a2b7933dab9e5a139f33c2b1f4" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "cutadapt": { + "content": [ + "cutadapt_summary.tsv:md5,5d02749984a811479e7d534fda75163f" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "software_versions": { + "content": [ + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "overall_summary_tsv": { + "content": [ + "overall_summary.tsv:md5,3231d6ee72b9a1e7742e5605caaff05a" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "barrnap": { + "content": [ + "rrna.arc.gff:md5,6dae470aace9293d5eb8c318584852dd", + "rrna.bac.gff:md5,439a9084f089120f700f938dfb58fa41", + "rrna.euk.gff:md5,c9bc1d9d8fb77dc19c95dee2d53840eb", + "rrna.mito.gff:md5,df19e1b84ba6f691d20c72b397c88abf" + ], + "timestamp": "2023-05-28T21:18:54+0000" + }, + "multiqc": { + "content": [ + "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", + "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", + "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + ], + "timestamp": "2023-05-28T21:18:54+0000" + } +} From a1dfb5b1b6943fc244a58ea701ba50cd085ff2dc Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:45:15 +0000 Subject: [PATCH 050/104] Skip dada tax. --- conf/test_qiimecustom.config | 1 + 1 file changed, 1 insertion(+) diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index ea6b97d8..2233070c 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -29,4 +29,5 @@ params { // Skip downstream analysis with QIIME2 skip_qiime_downstream = true + skip_dada_taxonomy = true } From 51dc97e82770417a5179abff1f50ae09c00ca71a Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:45:54 +0000 Subject: [PATCH 051/104] Sequence then taxonomy file for file pair to --qiime_ref_tax_custom. --- conf/test_qiimecustom.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index 2233070c..2fc9cb73 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -25,7 +25,7 @@ params { input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" // Custom reference taxonomy - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From a33f17f7937769b43e9a3e9fb5c480cc115b67a3 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 21:49:21 +0000 Subject: [PATCH 052/104] Clarify in help text of --qiime_ref_tax_custom the ordering of a file pair. --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 62c54f79..6ccfc3ad 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -434,7 +434,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths.", + "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { From 8f57faec61a65a422c93c4cb6526ff3d6abcb65c Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:00:26 +0000 Subject: [PATCH 053/104] Update snapshots to include qiime2 in both correctly and add assertions for qiime2. --- tests/pipeline/qiimecustom.nf.test | 11 +++++------ tests/pipeline/qiimecustom.nf.test.snap | 7 +++++++ tests/pipeline/reftaxcustom.nf.test | 2 ++ tests/pipeline/reftaxcustom.nf.test.snap | 10 +++++----- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index abd2a38a..8ec67571 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -2,11 +2,11 @@ nextflow_pipeline { name "Test Workflow main.nf" script "main.nf" - tag "test_reftaxcustom" - tag "dada2" + tag "test_qiimecustom" + tag "qiime2" tag "pipeline" - test("Custom DADA2 Reference Taxonomy Database") { + test("Custom QIIME2 Reference Taxonomy Database") { when { params { @@ -41,9 +41,8 @@ nextflow_pipeline { { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, - { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), - path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), - path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), + path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 680ca37a..616e1de0 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -32,6 +32,13 @@ ], "timestamp": "2023-05-28T21:18:54+0000" }, + "qiime2": { + "content": [ + "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87", + "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756" + ], + "timestamp": "2023-12-07T21:28:32+0000" + }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index abd2a38a..3f72ec5f 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,6 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, + { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), + path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 842b18de..90b157ec 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -44,16 +44,16 @@ }, "kraken2": { "content": [ - "taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", - "taxonomy/taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" + "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", + "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", + "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" ], "timestamp": "2023-09-15T21:16:26+0000" }, "qiime2": { "content": [ - "ASV_tax.user.kraken2.classifiedreads.txt:md5,8a4693c37d5c24b342ef161b92567764", - "ASV_tax.user.kraken2.complete.tsv:md5,3613dac9ce1bf03f87b57d1523e705f1", - "ASV_tax.user.kraken2.tsv:md5,95c3f9daa5da8fe00159fb07d394c3ce" + "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", + "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" ], "timestamp": "2023-12-07T21:28:32+0000" }, From 74e05b2a26208befa00fd2bad63cc9fd5f6d97de Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:02:21 +0000 Subject: [PATCH 054/104] Make ordering of sequence and taxonomy files deterministic in case of file pair. --- subworkflows/local/qiime2_preptax.nf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 9d5c6898..97ccba63 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -35,7 +35,14 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - ch_ref_database = ch_qiime_db_files.collate(2) + ch_ref_database_fna = ch_qiime_db_dir.filter { + it.getName().endsWith(".fna") + } + ch_ref_database_tax = ch_qiime_db_dir.filter { + it.getName().endsWith(".tax") + } + + ch_ref_database = ch_ref_database_fna.combine(ch_ref_database_tax) // Handle case we have been provided a single filepath (tarball or directory). } else { ch_qiime_ref_taxonomy.flatten() From b65df44c9ee053896666e9ccbee9bdc7ac2c41f8 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:03:51 +0000 Subject: [PATCH 055/104] Fix filtering in file pair case. --- subworkflows/local/qiime2_preptax.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index 97ccba63..7d0be52d 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -35,10 +35,10 @@ workflow QIIME2_PREPTAX { ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) - ch_ref_database_fna = ch_qiime_db_dir.filter { + ch_ref_database_fna = ch_qiime_db_files.filter { it.getName().endsWith(".fna") } - ch_ref_database_tax = ch_qiime_db_dir.filter { + ch_ref_database_tax = ch_qiime_db_files.filter { it.getName().endsWith(".tax") } From 45bee719af1aba754a9bfbea274d5116204b0df7 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:31:07 +0000 Subject: [PATCH 056/104] Fix version mixing in --qiime_ref_taxonomy case. --- subworkflows/local/qiime2_preptax.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index d514f086..dfa28725 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -82,7 +82,7 @@ workflow QIIME2_PREPTAX { } } else { FORMAT_TAXONOMY_QIIME ( ch_qiime_ref_taxonomy ) - ch_qiime2_preptax_versions(FORMAT_TAXONOMY_QIIME.out.versions) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(FORMAT_TAXONOMY_QIIME.out.versions) ch_ref_database = FORMAT_TAXONOMY_QIIME.out.fasta.combine(FORMAT_TAXONOMY_QIIME.out.tax) } From 3c9eaf129c0dd311b4e8bbdbc8e047eb2519cefb Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:45:45 +0000 Subject: [PATCH 057/104] Update software version expectations for tests that no longer run QIIME_PREPTAX. --- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/qiimecustom.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index d7cc9dce..b5e9cb2b 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 2c0382f0..daba2601 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 616e1de0..6c39df37 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 90b157ec..8fca1c5b 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index 069c7fa8..b19bf8fe 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, From 07f4407a4dcfc62be99f61e8a4ebaf6543caaf47 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 22:47:55 +0000 Subject: [PATCH 058/104] Remove assertions on dada2 tax and phyloseq files existing in test_qiimecustom. --- tests/pipeline/qiimecustom.nf.test | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index 8ec67571..2cdc080a 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -30,8 +30,6 @@ nextflow_pipeline { path("$outputDir/dada2/DADA2_stats.tsv"), path("$outputDir/dada2/DADA2_table.rds"), path("$outputDir/dada2/DADA2_table.tsv")).match("dada2") }, - { assert new File("$outputDir/dada2/ASV_tax.user.tsv").exists() }, - { assert new File("$outputDir/dada2/ASV_tax_species.user.tsv").exists() }, { assert new File("$outputDir/fastqc/sampleID_1_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_1_2_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_1a_1_fastqc.html").exists() }, @@ -46,8 +44,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } From 1c129e568cb2cbba9c11af01584763e3bca96dfe Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Thu, 7 Dec 2023 23:08:40 +0000 Subject: [PATCH 059/104] Looks like qiime2 tax alignment is non-deterministic, just verify the files it emits are emitted. --- tests/pipeline/qiimecustom.nf.test | 4 ++-- tests/pipeline/qiimecustom.nf.test.snap | 7 ------- tests/pipeline/reftaxcustom.nf.test | 4 ++-- tests/pipeline/reftaxcustom.nf.test.snap | 7 ------- 4 files changed, 4 insertions(+), 18 deletions(-) diff --git a/tests/pipeline/qiimecustom.nf.test b/tests/pipeline/qiimecustom.nf.test index 2cdc080a..49396815 100644 --- a/tests/pipeline/qiimecustom.nf.test +++ b/tests/pipeline/qiimecustom.nf.test @@ -39,8 +39,8 @@ nextflow_pipeline { { assert new File("$outputDir/fastqc/sampleID_2a_1_fastqc.html").exists() }, { assert new File("$outputDir/fastqc/sampleID_2a_2_fastqc.html").exists() }, { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, - { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), - path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 6c39df37..594688a9 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -32,13 +32,6 @@ ], "timestamp": "2023-05-28T21:18:54+0000" }, - "qiime2": { - "content": [ - "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,b744a656dbd4e710697bf9ee47f26c87", - "taxonomy.tsv:md5,44585412583f0cf5f2b82a1337f16756" - ], - "timestamp": "2023-12-07T21:28:32+0000" - }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 3f72ec5f..4e70861b 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -44,8 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/kraken2/ASV_tax.user.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.user.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.user.kraken2.tsv")).match("kraken2") }, - { assert snapshot(path("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza"), - path("$outputDir/qiime2/taxonomy/taxonomy.tsv")).match("qiime2") }, + { assert new File("$outputDir/qiime2/taxonomy/taxonomy.tsv").exists() }, + { assert new File("$outputDir/qiime2/taxonomy/GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 8fca1c5b..b5aa10f1 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -50,13 +50,6 @@ ], "timestamp": "2023-09-15T21:16:26+0000" }, - "qiime2": { - "content": [ - "GTGYCAGCMGCCGCGGTAA-GGACTACNVGGGTWTCTAAT-classifier.qza:md5,e3d3758fb6fb6b1d8e43dd1be578979b", - "taxonomy.tsv:md5,ad6793c7689b0a5e40169d2799564645" - ], - "timestamp": "2023-12-07T21:28:32+0000" - }, "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", From 04af15445a4aaae6f53ac17790c3be49b0e7e1b6 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 8 Dec 2023 14:18:19 +0000 Subject: [PATCH 060/104] Update CHANGELOG.md Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7639cf7c..14502e4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables -- []() - Fix logic relating to generation of qiime2 taxonomy part of summary report +- [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report ### `Dependencies` From 2ace59599e1c0c5ed19ffabf03ce2adb5a34c428 Mon Sep 17 00:00:00 2001 From: Matthew Date: Fri, 8 Dec 2023 14:26:03 +0000 Subject: [PATCH 061/104] Make --skip_qiime_downstream help text clearer. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 6ccfc3ad..14eef4b4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -658,7 +658,7 @@ }, "skip_qiime_downstream": { "type": "boolean", - "description": "Skip steps that are executed by QIIME2 except for taxonomic classification, including barplots, relative abundance tables, diversity analysis, differential abundance testing." + "description": "Skip steps that are executed by QIIME2 except for taxonomic classification. Skip steps including barplots, relative abundance tables, diversity analysis, differential abundance testing." }, "skip_taxonomy": { "type": "boolean", From 4464c38cef7be3e9309c3d036fda7172aba130a4 Mon Sep 17 00:00:00 2001 From: Matthew Marshall Date: Fri, 8 Dec 2023 14:48:12 +0000 Subject: [PATCH 062/104] Remove assertion on qiime phyloseq file no longer produced. --- tests/pipeline/pplace.nf.test | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 564cf2b9..b0507df7 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -56,8 +56,7 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } From f1a86851942218bd72c33c0358b784c1e8207f4f Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 16:37:09 +0100 Subject: [PATCH 063/104] Move midori to get dbs in alphabetical order --- conf/ref_databases.config | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index c80820ec..4ac7d02c 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -25,22 +25,6 @@ params { fmtscript = "taxref_reformat_coidb.sh" dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" } - 'midori2-co1' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } - 'midori2-co1=gb250' { - title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" - file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] - citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." - fmtscript = "taxref_reformat_midori2.sh" - dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" - taxlevels = "Phylum,Class,Order,Family,Genus,Species" - } 'gtdb' { title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ] @@ -76,6 +60,22 @@ params { fmtscript = "taxref_reformat_gtdb.sh" dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)" } + 'midori2-co1' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } + 'midori2-co1=gb250' { + title = "MIDORI2 - CO1 Taxonomy Database - Release GB250" + file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ] + citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245." + fmtscript = "taxref_reformat_midori2.sh" + dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" + taxlevels = "Phylum,Class,Order,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] From 54debbf2d49da6900c7a8e2302520c6007a40670 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:13:56 +0100 Subject: [PATCH 064/104] Sort dbs in alphabetical order in schema --- nextflow_schema.json | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d3098da..27e524c3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -335,36 +335,36 @@ "description": "Name of supported database, and optionally also version number", "default": "silva=138", "enum": [ - "midori2-co1=gb250", - "midori2-co1", + "coidb", + "coidb=221216", + "gtdb", "gtdb=R05-RS95", "gtdb=R06-RS202", "gtdb=R07-RS207", "gtdb=R08-RS214", - "gtdb", - "coidb", - "coidb=221216", - "pr2=5.0.0", - "pr2=4.14.0", - "pr2=4.13.0", + "midori2-co1", + "midori2-co1=gb250", "pr2", - "rdp=18", + "pr2=4.13.0", + "pr2=4.14.0", + "pr2=5.0.0", "rdp", + "rdp=18", "sbdi-gtdb", - "sbdi-gtdb=R07-RS207-1", - "sbdi-gtdb=R06-RS202-3", "sbdi-gtdb=R06-RS202-1", + "sbdi-gtdb=R06-RS202-3", + "sbdi-gtdb=R07-RS207-1", + "silva", "silva=132", "silva=138", - "silva", - "unite-fungi=9.0", - "unite-fungi=8.3", - "unite-fungi=8.2", - "unite-fungi", - "unite-alleuk=9.0", - "unite-alleuk=8.3", + "unite-alleuk", "unite-alleuk=8.2", - "unite-alleuk" + "unite-alleuk=8.3", + "unite-alleuk=9.0", + "unite-fungi", + "unite-fungi=8.2", + "unite-fungi=8.3", + "unite-fungi=9.0" ] }, "dada_ref_tax_custom": { From fac3d731df0efc94d64323e6f7d171d1d60cd4f4 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:22:51 +0100 Subject: [PATCH 065/104] Spelling mistake --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 62433cea..c2b93537 100644 --- a/docs/output.md +++ b/docs/output.md @@ -140,7 +140,7 @@ DADA2 reduces sequence errors and dereplicates sequences by quality filtering, d - `ASV_table.tsv`: Counts for each ASV sequence. - `DADA2_stats.tsv`: Tracking read numbers through DADA2 processing steps, for each sample. - `DADA2_table.rds`: DADA2 ASV table as R object. - - `DADA2_tables.tsv`: DADA2 ASV table. + - `DADA2_table.tsv`: DADA2 ASV table. - `dada2/args/`: Directory containing files with all parameters for DADA2 steps. - `dada2/log/`: Directory containing log files for DADA2 steps. - `dada2/QC/` From f61d94547539f20c6ac26fb6bbba70022ea2fd32 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:23:08 +0100 Subject: [PATCH 066/104] Add PhytoRef taxonomy database for chloroplast 16S --- bin/taxref_reformat_phytoref.sh | 7 +++++++ conf/ref_databases.config | 8 ++++++++ nextflow_schema.json | 1 + 3 files changed, 16 insertions(+) create mode 100755 bin/taxref_reformat_phytoref.sh diff --git a/bin/taxref_reformat_phytoref.sh b/bin/taxref_reformat_phytoref.sh new file mode 100755 index 00000000..c61c081e --- /dev/null +++ b/bin/taxref_reformat_phytoref.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cat PhytoRef_with_taxonomy.fasta | sed '/>/s/>[^|]*|/>/' | sed '/>/s/|/;/g' > assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cat PhytoRef_with_taxonomy.fasta | sed '/^>/s/>\([^|]\+\)|.*|\([^|]\+\)/>\1 \2/' > addSpecies.fna diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 4ac7d02c..65e0b9b1 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -76,6 +76,14 @@ params { dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)" taxlevels = "Phylum,Class,Order,Family,Genus,Species" } + 'phytoref' { + title = "PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes" + file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] + citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." + fmtscript = "taxref_reformat_phytoref.sh" + dbversion = "unknown" + taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" + } 'pr2' { title = "PR2 - Protist Reference Ribosomal Database - Version 5.0.0" file = [ "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v5.0.0/pr2_version_5.0.0_SSU_UTAX.fasta.gz" ] diff --git a/nextflow_schema.json b/nextflow_schema.json index 27e524c3..9494e8f4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -344,6 +344,7 @@ "gtdb=R08-RS214", "midori2-co1", "midori2-co1=gb250", + "phytoref", "pr2", "pr2=4.13.0", "pr2=4.14.0", From 58b0f7222e6409fdc1af4b085018de303376a3db Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 17:26:50 +0100 Subject: [PATCH 067/104] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0879526..3f31da02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` + ### `Changed` ### `Fixed` From 836bb451d02842b3b6f435161e61707223e983af Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:19:24 +0100 Subject: [PATCH 068/104] Add Zehr lab nifH database --- bin/taxref_reformat_zehr-nifh.sh | 7 +++++++ conf/ref_databases.config | 16 ++++++++++++++++ nextflow_schema.json | 4 +++- 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100755 bin/taxref_reformat_zehr-nifh.sh diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh new file mode 100755 index 00000000..86a8eb26 --- /dev/null +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Write the assignTaxonomy() fasta file: assignTaxonomy.fna +cp *.fasta assignTaxonomy.fna + +# Write the addSpecies() fasta file: addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),\(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 65e0b9b1..6d0d7d3b 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -247,6 +247,22 @@ params { dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)" shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"] } + 'zehr-nifh' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } + 'zehr-nifh=2.5.0' { + title = "Zehr lab nifH database - version 2.5.0" + file = [ "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_v2.0.5.fasta", "https://raw.githubusercontent.com/moyn413/nifHdada2/master/nifH_dada2_phylum_v2.0.5.csv" ] + citation = "M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213" + fmtscript = "taxref_reformat_zehr-nifh.sh" + dbversion = "Zehr-nifH v. 2.5.0" + taxlevels = "Domain,Phylum,Class,Order,Family,Genus" + } } //QIIME2 taxonomic reference databases qiime_ref_databases { diff --git a/nextflow_schema.json b/nextflow_schema.json index 9494e8f4..d2e5faa9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -365,7 +365,9 @@ "unite-fungi", "unite-fungi=8.2", "unite-fungi=8.3", - "unite-fungi=9.0" + "unite-fungi=9.0", + "zehr-nifh", + "zehr-nifh=2.5.0" ] }, "dada_ref_tax_custom": { From e4a09ce0d243a1016e18986d6ac3d094ad3c2235 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:23:52 +0100 Subject: [PATCH 069/104] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f31da02..5a1a239f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` +- [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` ### `Changed` From 02a25293d77cc501d01ed87de01013909037cea1 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Mon, 11 Dec 2023 19:39:53 +0100 Subject: [PATCH 070/104] Fix formatting mistake --- bin/taxref_reformat_zehr-nifh.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/taxref_reformat_zehr-nifh.sh b/bin/taxref_reformat_zehr-nifh.sh index 86a8eb26..54171f51 100755 --- a/bin/taxref_reformat_zehr-nifh.sh +++ b/bin/taxref_reformat_zehr-nifh.sh @@ -4,4 +4,4 @@ cp *.fasta assignTaxonomy.fna # Write the addSpecies() fasta file: addSpecies.fna -cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),\(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna +cut -d, -f 2,6,7 *.csv | grep -v '^sequence,' | sed 's/\(.*\),[0-9]* \(.*\),\(.*\)/>\3 \2\n\1/' > addSpecies.fna From 09ae4b7f9e0032043358cccc35c22e4b15845eee Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Tue, 12 Dec 2023 10:27:38 +0100 Subject: [PATCH 071/104] Change unknown to unnversioned for PhytoRef --- conf/ref_databases.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 6d0d7d3b..263f4390 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -81,7 +81,7 @@ params { file = [ "http://phytoref.sb-roscoff.fr/static/downloads/PhytoRef_with_taxonomy.fasta" ] citation = "Decelle, Johan, Sarah Romac, Rowena F. Stern, El Mahdi Bendif, Adriana Zingone, Stéphane Audic, Michael D. Guiry, et al. 2015. PhytoREF: A Reference Database of the Plastidial 16S rRNA Gene of Photosynthetic Eukaryotes with Curated Taxonomy. Molecular Ecology Resources 15 (6): 1435–45. https://doi.org/10.1111/1755-0998.12401." fmtscript = "taxref_reformat_phytoref.sh" - dbversion = "unknown" + dbversion = "unversioned" taxlevels = "Domain,Supergroup,Subphylum,Class,Subclass,Order,Suborder,Family,Genus,Species" } 'pr2' { From 9a646392966ff594af828c746d99ecffb2361a20 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 12 Dec 2023 10:46:36 +0100 Subject: [PATCH 072/104] fix phyloseq --- CHANGELOG.md | 1 + subworkflows/local/phyloseq_workflow.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0879526..ee5150f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables +- [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files ### `Dependencies` diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index adf208b7..214656e3 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -36,7 +36,7 @@ workflow PHYLOSEQ_WORKFLOW { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_tax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.first(), ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds From c3977cb13f8b8ce8f8605847aebbfec001b46290 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 12 Dec 2023 12:26:44 +0100 Subject: [PATCH 073/104] use .collect() to avoid warning and adjust tests --- subworkflows/local/phyloseq_workflow.nf | 2 +- tests/pipeline/pplace.nf.test | 3 ++- tests/pipeline/reftaxcustom.nf.test | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index 214656e3..2401cf44 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -36,7 +36,7 @@ workflow PHYLOSEQ_WORKFLOW { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.first(), ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.collect(), ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 564cf2b9..781d3dcd 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -57,7 +57,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index abd2a38a..67c4d546 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -48,7 +48,8 @@ nextflow_pipeline { path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/kraken2_phyloseq.rds").exists() } ) } } From a32b58251d7bf7edce7103b1314b03eb029a10d2 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 12 Dec 2023 12:34:21 +0100 Subject: [PATCH 074/104] combine channels instead of using .collect() --- modules/local/phyloseq.nf | 3 +-- subworkflows/local/phyloseq_workflow.nf | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index bbc6218b..946c91fa 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -8,8 +8,7 @@ process PHYLOSEQ { 'biocontainers/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' }" input: - tuple val(prefix), path(tax_tsv) - path otu_tsv + tuple val(prefix), path(tax_tsv), path(otu_tsv) path sam_tsv path tree diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index 2401cf44..3b6d9dd4 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -36,7 +36,7 @@ workflow PHYLOSEQ_WORKFLOW { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_tax, ch_phyloseq_inasv.collect(), ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax.combine(ch_phyloseq_inasv), ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds From 9550aecd328d61feaf0fb3efdbce52f1a6d9d74c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeanette=20T=C3=A5ngrot?= Date: Tue, 12 Dec 2023 15:06:44 +0100 Subject: [PATCH 075/104] Add cut_its to SBDI export --- bin/sbdiexportreannotate.R | 13 ++++++++----- modules/local/sbdiexportreannotate.nf | 3 ++- workflows/ampliseq.nf | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/bin/sbdiexportreannotate.R b/bin/sbdiexportreannotate.R index 19d5e3ae..68a2b392 100755 --- a/bin/sbdiexportreannotate.R +++ b/bin/sbdiexportreannotate.R @@ -18,7 +18,10 @@ dbversion <- args[1] taxfile <- args[2] taxmethod <- args[3] wfversion <- args[4] -predfile <- args[5] +cut_its <- args[5] +predfile <- args[6] + +cut_its = ifelse(cut_its == 'none', '', paste(' cut_its:', cut_its, sep='')) # Read taxonomy table taxonomy <- read.delim(taxfile, sep = '\t', stringsAsFactors = FALSE) @@ -108,10 +111,10 @@ taxtable <- taxonomy %>% date_identified = as.character(lubridate::today()), reference_db = dbversion, annotation_algorithm = case_when( - (taxmethod == 'sintax') ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) VSEARCH:sintax', sep=' '), - (!(is.na(otu) | otu == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) addsh', sep=' '), - (!(is.na(species_exact) | species_exact == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy:addSpecies', sep=' '), - TRUE ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy', sep=' ') + (taxmethod == 'sintax') ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) VSEARCH:sintax',cut_its, sep=' '), + (!(is.na(otu) | otu == '')) ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) addsh',cut_its, sep=' '), + (!(is.na(species_exact) | species_exact == '')) ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) DADA2:assignTaxonomy:addSpecies',cut_its, sep=' '), + TRUE ~ paste('Ampliseq ',wfversion,' (https://nf-co.re/ampliseq) DADA2:assignTaxonomy',cut_its, sep='') ), identification_references = 'https://docs.biodiversitydata.se/analyse-data/molecular-tools/#taxonomy-annotation', taxon_remarks = ifelse(!(is.na(domain) | domain == ''), paste('Domain = \'',domain,'\'',sep=''),''), diff --git a/modules/local/sbdiexportreannotate.nf b/modules/local/sbdiexportreannotate.nf index f06fae36..8ebe870c 100644 --- a/modules/local/sbdiexportreannotate.nf +++ b/modules/local/sbdiexportreannotate.nf @@ -11,6 +11,7 @@ process SBDIEXPORTREANNOTATE { path taxonomytable val taxonomymethod val dbversion + val cut_its path predictions output: @@ -28,7 +29,7 @@ process SBDIEXPORTREANNOTATE { ampliseq_version="v$workflow.manifest.version" fi - sbdiexportreannotate.R \"$dbversion\" $taxonomytable $taxonomymethod \"\$ampliseq_version\" $predictions + sbdiexportreannotate.R \"$dbversion\" $taxonomytable $taxonomymethod \"\$ampliseq_version\" $cut_its $predictions cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 05ddfee7..76a07726 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -722,11 +722,11 @@ workflow AMPLISEQ { if ( params.sintax_ref_taxonomy ) { SBDIEXPORT ( ch_dada2_asv, ch_sintax_tax, ch_metadata ) db_version = params.sintax_ref_databases[params.sintax_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, ch_barrnapsummary.ifEmpty([]) ) + SBDIEXPORTREANNOTATE ( ch_sintax_tax, "sintax", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) } else { SBDIEXPORT ( ch_dada2_asv, ch_dada2_tax, ch_metadata ) db_version = params.dada_ref_databases[params.dada_ref_taxonomy]["dbversion"] - SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, ch_barrnapsummary.ifEmpty([]) ) + SBDIEXPORTREANNOTATE ( ch_dada2_tax, "dada2", db_version, params.cut_its, ch_barrnapsummary.ifEmpty([]) ) } ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first()) } From cb517396405c2217968f6047e51040f0a8e65430 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeanette=20T=C3=A5ngrot?= Date: Tue, 12 Dec 2023 15:32:49 +0100 Subject: [PATCH 076/104] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0879526..9da4a49b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#](https://github.com/nf-core/ampliseq/pull/) - Added cut_its information to SDBI export + ### `Fixed` - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables From 667189cda1217b5e3fd9ad0b149ef306f0e52b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeanette=20T=C3=A5ngrot?= Date: Tue, 12 Dec 2023 15:38:43 +0100 Subject: [PATCH 077/104] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bdb1b62..205f686d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` -- [#](https://github.com/nf-core/ampliseq/pull/) - Added cut_its information to SDBI export +- [#677](https://github.com/nf-core/ampliseq/pull/677) - Added cut_its information to SDBI export ### `Fixed` From 6b71e4d2500e72ac3eda29d80f0654ed7e5fa481 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 10:19:46 +0100 Subject: [PATCH 078/104] Fix reporting --- assets/report_template.Rmd | 12 +++++++++--- modules/local/summary_report.nf | 3 ++- workflows/ampliseq.nf | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 8c8fc21e..264a7149 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -980,9 +980,15 @@ cat("\n\nDADA2 taxonomy assignments can be found in folder [dada2](../dada2) in # Header cat("## QIIME2\n") -cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - using the database: `", params$qiime2_ref_tax_title, "`. - More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +# indicate reference taxonomy +if ( !isFALSE(params$qiime2_ref_tax_title) ) { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) + using the database: `", params$qiime2_ref_tax_title, "`. + More details about the reference taxonomy database can be found in the ['Methods section'](#methods).\n\n", sep = "") +} else { + cat("The taxonomic classification was performed by [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) using a custom database ", + "provided by the user.\n\n", sep = "") +} # Read file and prepare table asv_tax <- read.table(params$qiime2_taxonomy, header = TRUE, sep = "\t") diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index a8e082b0..1a288a0f 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -118,7 +118,8 @@ process SUMMARY_REPORT { kraken2_tax ? "kraken2_taxonomy='$kraken2_tax',kraken2_confidence='$params.kraken2_confidence'" : "", kraken2_tax && !params.kraken2_ref_tax_custom ? "kraken2_ref_tax_title='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["title"]}',kraken2_ref_tax_file='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["file"]}',kraken2_ref_tax_citation='${params.kraken2_ref_databases[params.kraken2_ref_taxonomy]["citation"]}'" : "", pplace_tax ? "pplace_taxonomy='$pplace_tax',pplace_heattree='$pplace_heattree'" : "", - qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_taxonomy='$qiime2_tax',qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax ? "qiime2_taxonomy='$qiime2_tax'" : "", + qiime2_tax && params.qiime_ref_taxonomy ? "qiime2_ref_tax_title='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["title"]}',qiime2_ref_tax_file='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["file"]}',qiime2_ref_tax_citation='${params.qiime_ref_databases[params.qiime_ref_taxonomy]["citation"]}'" : "", run_qiime2 ? "val_used_taxonomy='$val_used_taxonomy'" : "", filter_stats_tsv ? "filter_stats_tsv='$filter_stats_tsv',qiime2_filtertaxa='$qiime2_filtertaxa',exclude_taxa='$params.exclude_taxa',min_frequency='$params.min_frequency',min_samples='$params.min_samples'" : "", barplot ? "barplot=TRUE" : "", diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9e85bf6a..6dcc370f 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -863,7 +863,7 @@ workflow AMPLISEQ { !params.skip_taxonomy && ( params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom ) ? KRAKEN2_TAXONOMY_WF.out.tax_tsv.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? ch_pplace_tax.ifEmpty( [] ) : [], !params.skip_taxonomy && params.pplace_tree ? FASTA_NEWICK_EPANG_GAPPA.out.heattree.ifEmpty( [[],[]] ) : [[],[]], - !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2 ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], + !params.skip_taxonomy && ( params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.classifier ) && run_qiime2_taxonomy ? QIIME2_TAXONOMY.out.tsv.ifEmpty( [] ) : [], run_qiime2, run_qiime2 ? val_used_taxonomy : "", run_qiime2 && ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) ? ch_dada2_asv.countLines()+","+QIIME2_FILTERTAXA.out.tsv.countLines() : "", From 67da9335ceb325b15939ad34d02e27cf4b599a9a Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 11:00:07 +0100 Subject: [PATCH 079/104] Prevent masking low complexity regions by VSEARCH --- CHANGELOG.md | 1 + conf/modules.config | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecf13f13..77c3a6bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables - [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files +- [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index 68794ab7..69b7967c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -451,7 +451,7 @@ process { } withName: VSEARCH_CLUSTER { - ext.args = "--id ${params.vsearch_cluster_id} --usersort" + ext.args = '--id ${params.vsearch_cluster_id} --usersort --qmask "none"' ext.args2 = '--cluster_smallmem' ext.args3 = '--clusters' } From a1992a484a883b7bcc2531b112c2cc0f45743a3a Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 11:08:55 +0100 Subject: [PATCH 080/104] fix error message for --input_folder --- subworkflows/local/parse_input.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/parse_input.nf b/subworkflows/local/parse_input.nf index ba8aa484..ae134ae9 100644 --- a/subworkflows/local/parse_input.nf +++ b/subworkflows/local/parse_input.nf @@ -11,7 +11,7 @@ workflow PARSE_INPUT { //Check folders in folder when multiple_sequencing_runs folders = multiple_sequencing_runs ? "/*" : "" error_message = "\nCannot find any reads matching: \"${input}${folders}${extension}\"\n" - error_message += "Please revise the input folder (\"--input\"): \"${input}\"\n" + error_message += "Please revise the input folder (\"--input_folder\"): \"${input}\"\n" error_message += "and the input file pattern (\"--extension\"): \"${extension}\"\n" error_message += "*Please note: Path needs to be enclosed in quotes!*\n" error_message += multiple_sequencing_runs ? "If you do not have multiple sequencing runs, please do not use \"--multiple_sequencing_runs\"!\n" : "If you have multiple sequencing runs, please add \"--multiple_sequencing_runs\"!\n" From 55116e04d53aae932153c2f67416211b25464ae1 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:08:17 +0100 Subject: [PATCH 081/104] Fix hyphens --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 69b7967c..cbc04cd0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -451,7 +451,7 @@ process { } withName: VSEARCH_CLUSTER { - ext.args = '--id ${params.vsearch_cluster_id} --usersort --qmask "none"' + ext.args = "--id ${params.vsearch_cluster_id} --usersort --qmask 'none'" ext.args2 = '--cluster_smallmem' ext.args3 = '--clusters' } From 51ee751e0bdacf590aab29fdaf15ad4220329e06 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:30:47 +0100 Subject: [PATCH 082/104] only report input folder when it is created --- assets/report_template.Rmd | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 264a7149..cf1fc548 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -181,17 +181,13 @@ supporting denoising of any amplicon and supports a variety of taxonomic databas ```{r, results='asis'} if ( !isFALSE(params$metadata) ) { - cat(paste0(" -# Data input and Metadata - -Pipeline input was saved to the [input](../input) directory. - ")) + cat("# Data input and Metadata\n\n") } else { - cat(paste0(" -# Data input + cat("# Data input\n\n") +} -Pipeline input was saved in folder [input](../input). - ")) +if ( !isFALSE(params$metadata) || !isFALSE(params$input_samplesheet) ) { + cat("Pipeline input was saved in folder [input](../input).\n\n") } if ( !isFALSE(params$input_samplesheet) ) { From f572fafeceb7949351bd27c90bea468d2b9a9f01 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:42:50 +0100 Subject: [PATCH 083/104] report correct cutadapt percentages for all possible number ranges --- assets/report_template.Rmd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index cf1fc548..752a6b17 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -258,8 +258,7 @@ the denoising tool or sequences might be lost due to being labelled as PCR chime # import tsv cutadapt_summary <- read.table(file = params$cutadapt_summary, header = TRUE, sep = "\t") -cutadapt_passed_col <- as.numeric(substr( - cutadapt_summary$cutadapt_passing_filters_percent, 1, 4)) +cutadapt_passed_col <- as.numeric( gsub("%","",cutadapt_summary$cutadapt_passing_filters_percent) ) cutadapt_max_discarded <- round( 100 - min(cutadapt_passed_col), 1 ) cutadapt_avg_passed <- round(mean(cutadapt_passed_col),1) From 13a6e5ec988505faf21306bd2b33646a5787b43c Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 13:48:53 +0100 Subject: [PATCH 084/104] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecf13f13..01ccf88b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables - [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files +- [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages ### `Dependencies` From d0786b7a057e4478b67ffcba60d7dda3f038a9b1 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 19 Dec 2023 14:54:40 +0100 Subject: [PATCH 085/104] add --dada_addspecies_allowmultiple and --dada_taxonomy_rc --- CHANGELOG.md | 1 + conf/modules.config | 9 ++++----- docs/usage.md | 8 ++++---- nextflow.config | 4 +++- nextflow_schema.json | 10 ++++++++++ 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77c3a6bc..22dd3c52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` - [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` +- [#681](https://github.com/nf-core/ampliseq/pull/681) - For DADA2, with `--dada_addspecies_allowmultiple` multiple exact species matches are reported and with `--dada_taxonomy_rc` reverse-complement matches are also considered in taxonomic classification ### `Changed` diff --git a/conf/modules.config b/conf/modules.config index cbc04cd0..e02e9342 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -355,8 +355,7 @@ process { ext.seed = "${params.seed}" ext.args = [ 'minBoot = 50', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ @@ -375,9 +374,9 @@ process { withName: DADA2_ADDSPECIES { ext.seed = "${params.seed}" ext.args = [ - 'allowMultiple = FALSE, n = 1e5', - params.pacbio ? "tryRC = TRUE" : - params.iontorrent ? "tryRC = TRUE" : "" + 'n = 1e5', + params.dada_addspecies_allowmultiple ? "allowMultiple = TRUE" : "", + params.dada_taxonomy_rc || params.pacbio || params.iontorrent ? "tryRC = TRUE" : "tryRC = FALSE" ].join(',').replaceAll('(,)*$', "") publishDir = [ [ diff --git a/docs/usage.md b/docs/usage.md index 38c2cc23..f8625b2f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -221,18 +221,18 @@ Pre-configured reference taxonomy databases are: | Database key | DADA2 | SINTAX | Kraken2 | QIIME2 | Target genes | | ------------ | ----- | ------ | ------- | ------ | --------------------------------------------- | | silva | + | - | + | + | 16S rRNA | -| gtdb | + | - | - | - | 16S rRNA | +| gtdb | +¹ | - | - | - | 16S rRNA | | sbdi-gtdb | + | - | - | - | 16S rRNA | | rdp | + | - | + | - | 16S rRNA | -| greengenes | - | - | + | (+)¹ | 16S rRNA | +| greengenes | - | - | + | (+)² | 16S rRNA | | pr2 | + | - | - | - | 18S rRNA | | unite-fungi | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | -| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses² | +| standard | - | - | + | - | any in genomes of archaea, bacteria, viruses³ | -¹: de-replicated at 85%, only for testing purposes; ²: quality of results might vary +¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary Special features of taxonomic classification tools: diff --git a/nextflow.config b/nextflow.config index 831a43a2..7f35f9e4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -107,13 +107,15 @@ params { dada_ref_tax_custom = null dada_ref_tax_custom_sp = null cut_dada_ref_taxonomy = false + dada_addspecies_allowmultiple = false + dada_taxonomy_rc = false sintax_ref_taxonomy = null qiime_ref_taxonomy = null qiime_ref_tax_custom = null kraken2_ref_taxonomy = null kraken2_assign_taxlevels = null kraken2_ref_tax_custom = null - kraken2_confidence = 0 + kraken2_confidence = 0.0 // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index b8afed35..37d528fd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -390,6 +390,16 @@ "help_text": "Expected amplified sequences are extracted from the DADA2 reference taxonomy using the primer sequences, that might improve classification. This is not applied to species classification (assignSpecies) but only for lower taxonomic levels (assignTaxonomy).", "description": "If the expected amplified sequences are extracted from the DADA2 reference taxonomy database" }, + "dada_addspecies_allowmultiple": { + "type": "boolean", + "help_text": "Defines the behavior when multiple exact matches against different species are returned. By default only unambiguous identifications are returned. If TRUE, a concatenated string of all exactly matched species is returned.", + "description": "If multiple exact matches against different species are returned" + }, + "dada_taxonomy_rc": { + "type": "boolean", + "help_text": "Reverse-complement of each sequences will be used for classification if it is a better match to the reference sequences than the forward sequence.", + "description": "If reverse-complement of each sequences will be also tested for classification" + }, "pplace_tree": { "type": "string", "description": "Newick file with reference phylogenetic tree. Requires also `--pplace_aln` and `--pplace_model`." From 2edaa5a08de446166a1d7f1e367029a6fc4c1b26 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 19 Dec 2023 18:30:49 +0000 Subject: [PATCH 086/104] Template update for nf-core/tools version 2.11 --- .github/CONTRIBUTING.md | 3 + .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/workflows/ci.yml | 2 +- .github/workflows/fix-linting.yml | 4 +- .github/workflows/linting.yml | 12 +- .gitpod.yml | 4 +- CHANGELOG.md | 2 +- README.md | 17 +-- assets/multiqc_config.yml | 2 +- assets/slackreport.json | 2 +- conf/modules.config | 2 +- docs/usage.md | 4 +- lib/NfcoreTemplate.groovy | 32 ++--- modules.json | 6 +- .../dumpsoftwareversions/environment.yml | 7 ++ .../custom/dumpsoftwareversions/main.nf | 6 +- .../custom/dumpsoftwareversions/meta.yml | 7 +- .../dumpsoftwareversions/tests/main.nf.test | 38 ++++++ .../tests/main.nf.test.snap | 27 +++++ .../dumpsoftwareversions/tests/tags.yml | 2 + modules/nf-core/fastqc/environment.yml | 7 ++ modules/nf-core/fastqc/main.nf | 10 +- modules/nf-core/fastqc/meta.yml | 5 + modules/nf-core/fastqc/tests/main.nf.test | 109 ++++++++++++++++++ .../nf-core/fastqc/tests/main.nf.test.snap | 10 ++ modules/nf-core/fastqc/tests/tags.yml | 2 + modules/nf-core/multiqc/environment.yml | 7 ++ modules/nf-core/multiqc/main.nf | 8 +- modules/nf-core/multiqc/meta.yml | 11 +- modules/nf-core/multiqc/tests/main.nf.test | 63 ++++++++++ modules/nf-core/multiqc/tests/tags.yml | 2 + nextflow.config | 12 +- 32 files changed, 360 insertions(+), 66 deletions(-) create mode 100644 modules/nf-core/custom/dumpsoftwareversions/environment.yml create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap create mode 100644 modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml create mode 100644 modules/nf-core/fastqc/environment.yml create mode 100644 modules/nf-core/fastqc/tests/main.nf.test create mode 100644 modules/nf-core/fastqc/tests/main.nf.test.snap create mode 100644 modules/nf-core/fastqc/tests/tags.yml create mode 100644 modules/nf-core/multiqc/environment.yml create mode 100644 modules/nf-core/multiqc/tests/main.nf.test create mode 100644 modules/nf-core/multiqc/tests/tags.yml diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c1642f76..e5e7a3d5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -27,6 +27,9 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to +receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 636e0714..ada206f7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/ampl - [ ] If necessary, also make a PR on the nf-core/ampliseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 53759d75..435741ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index ec23bef2..9781ad7c 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: token: ${{ secrets.nf_core_bot_auth_token }} @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..905c58e4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -27,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 - name: Install Prettier run: npm install -g prettier @@ -40,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Check code lints with Black uses: psf/black@stable @@ -71,7 +71,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..acf72695 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,9 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a8be928..85cef0e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.7.0dev - [date] +## v2.8.0dev - [date] Initial release of nf-core/ampliseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index ccec01ab..d60f868d 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,8 @@ ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - + diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 64df13db..eaf9f190 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: diff --git a/assets/slackreport.json b/assets/slackreport.json index b170caab..6eab3738 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/ampliseq v${version} - ${runName}", + "author_name": "nf-core/ampliseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/modules.config b/conf/modules.config index 39e81386..d91c6aba 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -39,7 +39,7 @@ process { } withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/docs/usage.md b/docs/usage.md index c129feba..569ebf71 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -20,7 +20,7 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz @@ -33,7 +33,7 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. -```console +```csv title="samplesheet.csv" sample,fastq_1,fastq_2 CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 01b8653d..e248e4c3 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,15 +232,14 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } // diff --git a/modules.json b/modules.json index dca11289..d32b2957 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 00000000..f0c63f69 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc87273..7685b33c 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de..5f15a5fd 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 00000000..eec1db10 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 00000000..4274ed57 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ], + "1": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "2": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" + ], + "versions": [ + "versions.yml:md5,3843ac526e762117eedf8825b40683df" + ], + "yml": [ + "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" + ] + } + ], + "timestamp": "2023-11-03T14:43:22.157011" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 00000000..405aa24a --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f9064..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..b9e8f926 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,109 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("Single-Read") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) + } + } +// TODO +// // +// // Test with paired-end data +// // +// workflow test_fastqc_paired_end { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with interleaved data +// // +// workflow test_fastqc_interleaved { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with bam data +// // +// workflow test_fastqc_bam { +// input = [ +// [id: 'test', single_end: false], // meta map +// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with multiple samples +// // +// workflow test_fastqc_multiple { +// input = [ +// [id: 'test', single_end: false], // meta map +// [ +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), +// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) +// ] +// ] + +// FASTQC ( input ) +// } + +// // +// // Test with custom prefix +// // +// workflow test_fastqc_custom_prefix { +// input = [ +// [ id:'mysample', single_end:true ], // meta map +// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) +// ] + +// FASTQC ( input ) +// } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..636a32ce --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..bc0bdb5b --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.18 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..00cc48d2 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : + 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..f1aa660e 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,5 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..c2dad217 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,63 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("MULTIQC: FASTQC") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } + + test("MULTIQC: FASTQC and a config file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.report.get(0)).exists() }, + { assert path(process.out.data.get(0)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/nextflow.config b/nextflow.config index a982e809..027837bf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { input = null // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false @@ -82,6 +82,7 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -104,13 +105,13 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + runOptions = '-u $(id -u):$(id -g)' } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' @@ -181,7 +182,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -204,6 +205,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -229,7 +233,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.7.0dev' + version = '2.8.0dev' doi = '' } From b8648b2f422766f999f8d62cfdfd9f2059345803 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 09:09:08 +0100 Subject: [PATCH 087/104] update README.md to github style syntax --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 78c4f193..2e04e6cc 100644 --- a/README.md +++ b/README.md @@ -65,13 +65,11 @@ nextflow run nf-core/ampliseq \ --outdir ``` -:::note -Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata). -::: +> [!NOTE] +> Adding metadata will considerably increase the output, see [metadata documentation](https://nf-co.re/ampliseq/usage#metadata). -:::note -By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). -::: +> [!TIP] +> By default the taxonomic assignment will be performed with DADA2 on SILVA database, but there are various tools and databases readily available, see [taxonomic classification documentation](https://nf-co.re/ampliseq/usage#taxonomic-classification). > [!WARNING] > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; From 9ebe892d0f20deb666eeb59b30b725ee32e74a0c Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 09:16:57 +0100 Subject: [PATCH 088/104] update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d4b19e2..8a0107dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages +- [#683](https://github.com/nf-core/ampliseq/pull/683) - Template update for nf-core/tools version 2.11 ### `Dependencies` From cbe6bf9a7a4af62cdb64d81d7b343583f726b3ed Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 10:38:23 +0100 Subject: [PATCH 089/104] adjust some assertions in .nf.test.snap --- tests/pipeline/doubleprimers.nf.test.snap | 6 +++--- tests/pipeline/fasta.nf.test.snap | 2 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/novaseq.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test.snap | 4 ++-- tests/pipeline/qiimecustom.nf.test.snap | 6 +++--- tests/pipeline/reftaxcustom.nf.test.snap | 6 +++--- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test.snap | 6 +++--- 12 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index b5e9cb2b..bfa9d45a 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, @@ -52,8 +52,8 @@ }, "multiqc": { "content": [ - "multiqc_general_stats.txt:md5,8429be0a16adf09b6634bf31b430bfac", - "multiqc_cutadapt.txt:md5,e89359b4478ef5d10620709f651f26a2" + "multiqc_general_stats.txt:md5,bb1d98b03d4cd5091acfbef93cb38fc4", + "multiqc_cutadapt.txt:md5,0ef33b6eb4d202c34fcfa51a0dffadda" ], "timestamp": "2023-05-28T21:08:54+0000" } diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index 6350712f..db862e0f 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 420b3dd6..61d7a616 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index daba2601..730ba6a4 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 427cd40b..0b4abc88 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index c211e2b6..c6975e2c 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index 4f64efa8..724b70f3 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, @@ -53,7 +53,7 @@ "multiqc": { "content": [ "multiqc_general_stats.txt:md5,9e8ff06d7285ab8748a80e639d3dd54a", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-06-20T17:24:03+0000" } diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index 594688a9..e7628647 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, @@ -35,8 +35,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T21:18:54+0000" } diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index b5aa10f1..2e591854 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, @@ -53,8 +53,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T21:18:54+0000" } diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index a31b986c..7e0bf619 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index b19bf8fe..f2d01ce0 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index 967f1369..2d29c03f 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.4, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, @@ -58,8 +58,8 @@ "multiqc": { "content": [ "multiqc_fastqc.txt:md5,147764e40079c3abf97a17cfe2275c52", - "multiqc_general_stats.txt:md5,88c2b9e6d02b83afe4f9551e6c9a91a7", - "multiqc_cutadapt.txt:md5,330a7b72dc671ca99fcb3fb84b6776c1" + "multiqc_general_stats.txt:md5,c6456e36c17e592f12f9a2f9069f24f8", + "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-05-28T20:55:32+0000" }, From 1e14c0ec8a34ff93ad8f1babb9227cb5828ca506 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 14:22:12 +0100 Subject: [PATCH 090/104] update docker runOptions --- nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 21483d47..cde6082a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -210,10 +210,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - runOptions = '-u $(id -u):$(id -g)' + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true From 9742624b19082a54fd481119406785af6bc802d6 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 20 Dec 2023 14:44:48 +0100 Subject: [PATCH 091/104] adjust pplace.nf.test.snap --- tests/pipeline/pplace.nf.test.snap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index 724b70f3..3e23ab63 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -52,7 +52,7 @@ }, "multiqc": { "content": [ - "multiqc_general_stats.txt:md5,9e8ff06d7285ab8748a80e639d3dd54a", + "multiqc_general_stats.txt:md5,cbe0b448f630111ee18976891354701a", "multiqc_cutadapt.txt:md5,c640ad4fa88bc31b09fa353e606013a2" ], "timestamp": "2023-06-20T17:24:03+0000" From 127b88a10216204010fb514aeb6bd1682cdeb96c Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 20 Dec 2023 15:54:53 +0000 Subject: [PATCH 092/104] Template update for nf-core/tools version 2.11.1 --- .../{release-announcments.yml => release-announcements.yml} | 0 nextflow.config | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{release-announcments.yml => release-announcements.yml} (100%) diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 100% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml diff --git a/nextflow.config b/nextflow.config index 027837bf..05555e3b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -111,10 +111,10 @@ profiles { shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false - runOptions = '-u $(id -u):$(id -g)' + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true From 815e62b2af143a1a8545e900a86cae6ee4e3b4a6 Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:05:30 +0100 Subject: [PATCH 093/104] Apply suggestions from code review --- bin/taxref_reformat_qiime_greengenes2022.sh | 8 ++------ conf/ref_databases.config | 8 +++++++- nextflow_schema.json | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/bin/taxref_reformat_qiime_greengenes2022.sh b/bin/taxref_reformat_qiime_greengenes2022.sh index aa4678a8..69c75fae 100755 --- a/bin/taxref_reformat_qiime_greengenes2022.sh +++ b/bin/taxref_reformat_qiime_greengenes2022.sh @@ -1,9 +1,5 @@ #!/bin/sh # Decompress files. -gzip -c -d 2022.10.seqs.fna.gz > 2022.10.seqs.fna -gzip -c -d 2022.10.taxonomy.md5.tsv.gz > 2022.10.taxonomy.md5.tsv - -# Select and rename files -mv *.fna greengenes2022.fna -mv *.tsv greengenes2022.tax +gzip -c -d *.seqs.fna.gz > greengenes2.fna +gzip -c -d *.taxonomy.md5.tsv.gz > greengenes2.tax diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 157e835e..e89df338 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -330,7 +330,13 @@ params { citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139" fmtscript = "taxref_reformat_qiime_greengenes85.sh" } - 'greengenes2022' { + 'greengenes2' { + title = "Greengenes2 16S - Version 2022.10" + file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] + citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" + fmtscript = "taxref_reformat_qiime_greengenes2022.sh" + } + 'greengenes2=2022.10' { title = "Greengenes2 16S - Version 2022.10" file = [ "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.seqs.fna.gz", "http://ftp.microbio.me/greengenes_release/2022.10/2022.10.taxonomy.md5.tsv.gz" ] citation = "McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1" diff --git a/nextflow_schema.json b/nextflow_schema.json index 0f00790f..29aee21e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -443,7 +443,8 @@ "unite-alleuk=8.2", "unite-alleuk", "greengenes85", - "greengenes2022" + "greengenes2", + "greengenes2=2022.10" ] }, "qiime_ref_tax_custom": { From 70b2e01d100e6d015c4cf52236200a3a7c90aff9 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 11 Jan 2024 16:20:06 +0100 Subject: [PATCH 094/104] activate multi-cpu use in QIIME2_EXTRACT --- conf/base.config | 5 +++++ modules/local/qiime2_extract.nf | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index c628a10c..c16be532 100644 --- a/conf/base.config +++ b/conf/base.config @@ -63,4 +63,9 @@ process { withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + withName:QIIME2_EXTRACT { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 24.h * task.attempt, 'time' ) } + } } diff --git a/modules/local/qiime2_extract.nf b/modules/local/qiime2_extract.nf index f3a61b6e..7ff383fd 100644 --- a/modules/local/qiime2_extract.nf +++ b/modules/local/qiime2_extract.nf @@ -1,7 +1,5 @@ process QIIME2_EXTRACT { tag "${meta.FW_primer}-${meta.RV_primer}" - label 'process_low' - label 'single_cpu' container "qiime2/core:2023.7" @@ -20,6 +18,7 @@ process QIIME2_EXTRACT { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead." } + def args = task.ext.args ?: '' """ export XDG_CONFIG_HOME="./xdgconfig" export MPLCONFIGDIR="./mplconfigdir" @@ -37,9 +36,11 @@ process QIIME2_EXTRACT { --output-path ref-taxonomy.qza #Extract sequences based on primers qiime feature-classifier extract-reads \\ + --p-n-jobs ${task.cpus} \\ --i-sequences ref-seq.qza \\ --p-f-primer ${meta.FW_primer} \\ --p-r-primer ${meta.RV_primer} \\ + $args \\ --o-reads ${meta.FW_primer}-${meta.RV_primer}-ref-seq.qza \\ --quiet From 54e3d602af89c7304045e047d562e071c4bb5e13 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 09:24:55 +0100 Subject: [PATCH 095/104] fix conda package in FILTER_SSU --- CHANGELOG.md | 1 + modules/local/filter_ssu.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a565e81..c45a1b96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages - [#683](https://github.com/nf-core/ampliseq/pull/683) - Template update for nf-core/tools version 2.11 +- [#687](https://github.com/nf-core/ampliseq/pull/687) - Correct conda package for ASV SSU filtering ### `Dependencies` diff --git a/modules/local/filter_ssu.nf b/modules/local/filter_ssu.nf index 5b3c623c..314a63c7 100644 --- a/modules/local/filter_ssu.nf +++ b/modules/local/filter_ssu.nf @@ -2,7 +2,7 @@ process FILTER_SSU { tag "${fasta}" label 'process_low' - conda "bioconductor::biostrings=2.58.0" + conda "bioconda::bioconductor-biostrings=2.58.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-biostrings:2.58.0--r40h037d062_0' : 'biocontainers/bioconductor-biostrings:2.58.0--r40h037d062_0' }" From 06dcda77d883da693c0ce3dd916a3428d0b13158 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 10:02:53 +0100 Subject: [PATCH 096/104] update docs --- CHANGELOG.md | 5 ++--- CITATIONS.md | 20 ++++++++++++++++---- docs/usage.md | 4 +++- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a565e81..3fb8d139 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,11 +19,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#672](https://github.com/nf-core/ampliseq/pull/672) - Update output docs for collapsed abundance tables -- [#673](https://github.com/nf-core/ampliseq/pull/673) - Fix logic relating to generation of qiime2 taxonomy part of summary report +- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/688) - Updated documentation - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters -- [#680](https://github.com/nf-core/ampliseq/pull/680) - Improved pipeline summary report & error messages +- [#680](https://github.com/nf-core/ampliseq/pull/680),[#673](https://github.com/nf-core/ampliseq/pull/673) - Improved pipeline summary report & error messages - [#683](https://github.com/nf-core/ampliseq/pull/683) - Template update for nf-core/tools version 2.11 ### `Dependencies` diff --git a/CITATIONS.md b/CITATIONS.md index ee03b01c..7c80b906 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -41,6 +41,10 @@ > Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112. +- [Greengenes2](https://doi.org/10.1038/s41587-023-01845-1) + + > McDonald, D., Jiang, Y., Balaban, M. et al. Greengenes2 unifies microbial data in a single reference tree. Nat Biotechnol (2023). https://doi.org/10.1038/s41587-023-01845-1 + - [PR2 - Protist Reference Ribosomal Database](https://pubmed.ncbi.nlm.nih.gov/23193267/) > Guillou L, Bachar D, Audic S, Bass D, Berney C, Bittner L, Boutte C, Burgaud G, de Vargas C, Decelle J, Del Campo J, Dolan JR, Dunthorn M, Edvardsen B, Holzmann M, Kooistra WH, Lara E, Le Bescot N, Logares R, Mahé F, Massana R, Montresor M, Morard R, Not F, Pawlowski J, Probert I, Sauvadet AL, Siano R, Stoeck T, Vaulot D, Zimmermann P, Christen R. The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy. Nucleic Acids Res. 2013 Jan;41(Database issue):D597-604. doi: 10.1093/nar/gks1160. Epub 2012 Nov 27. PMID: 23193267; PMCID: PMC3531120. @@ -61,13 +65,21 @@ > Kõljalg U, Larsson KH, Abarenkov K, Nilsson RH, Alexander IJ, Eberhardt U, Erland S, Høiland K, Kjøller R, Larsson E, Pennanen T, Sen R, Taylor AF, Tedersoo L, Vrålstad T, Ursing BM. UNITE: a database providing web-based methods for the molecular identification of ectomycorrhizal fungi. New Phytol. 2005 Jun;166(3):1063-8. doi: 10.1111/j.1469-8137.2005.01376.x. PMID: 15869663. - - [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) +- [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) + + > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303. + +- [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) + + > Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2. + +- [PhytoRef plastid 16S rRNA database for photosynthetic eukaryotes](https://pubmed.ncbi.nlm.nih.gov/25740460/) - > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303. + > Decelle J, Romac S, Stern RF, Bendif el M, Zingone A, Audic S, Guiry MD, Guillou L, Tessier D, Le Gall F, Gourvil P, Dos Santos AL, Probert I, Vaulot D, de Vargas C, Christen R. PhytoREF: a reference database of the plastidial 16S rRNA gene of photosynthetic eukaryotes with curated taxonomy. Mol Ecol Resour. 2015 Nov;15(6):1435-45. doi: 10.1111/1755-0998.12401. Epub 2015 Apr 6. PMID: 25740460. - - [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) +- [Zehr lab nifH database](http://doi.org/10.5281/zenodo.7996213) - > Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2. + > M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213 ### Phylogenetic placement diff --git a/docs/usage.md b/docs/usage.md index 779aa846..acf62a37 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -231,6 +231,8 @@ Pre-configured reference taxonomy databases are: | unite-alleuk | + | + | - | + | eukaryotic nuclear ribosomal ITS region | | coidb | + | + | - | - | eukaryotic Cytochrome Oxidase I (COI) | | midori2-co1 | + | - | - | - | eukaryotic Cytochrome Oxidase I (COI) | +| phytoref | + | - | - | - | eukaryotic plastid 16S rRNA | +| zehr-nifh | + | - | - | - | Nitrogenase iron protein NifH | | standard | - | - | + | - | any in genomes of archaea, bacteria, viruses³ | ¹[`--dada_taxonomy_rc`](https://nf-co.re/ampliseq/parameters#dada_taxonomy_rc) is recommended; ²: de-replicated at 85%, only for testing purposes; ³: quality of results might vary @@ -242,7 +244,7 @@ Special features of taxonomic classification tools: - QIIME2's reference taxonomy databases will have regions matching the amplicon extracted with primer sequences. - DADA2, Kraken2, and QIIME2 have specific parameters to accept custom databases (but theoretically possible with all classifiers) -Parameter guidance is given in [nf-core/ampliseq website parameter documentation](https://nf-co.re/ampliseq/parameters/#taxonomic-database). +Parameter guidance is given in [nf-core/ampliseq website parameter documentation](https://nf-co.re/ampliseq/parameters/#taxonomic-database). Citations are listed in [`CITATIONS.md`](CITATIONS.md). ### Metadata From 9dbb3531423128282ed9732a812153a633ac0ba8 Mon Sep 17 00:00:00 2001 From: Daniel Straub <42973691+d4straub@users.noreply.github.com> Date: Fri, 12 Jan 2024 10:19:25 +0100 Subject: [PATCH 097/104] Apply suggestions from code review Co-authored-by: Till E. <64961761+tillenglert@users.noreply.github.com> --- CITATIONS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 7c80b906..73e92bc0 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -67,7 +67,7 @@ - [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/) - > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303. + > Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. doi: https://doi.org/10.1002/edn3.303. - [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2) @@ -79,7 +79,7 @@ - [Zehr lab nifH database](http://doi.org/10.5281/zenodo.7996213) - > M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. http://doi.org/10.5281/zenodo.7996213 + > M. A. Moynihan & C. Furbo Reeder 2023. nifHdada2 GitHub repository, v2.0.5. Zenodo. doi: http://doi.org/10.5281/zenodo.7996213 ### Phylogenetic placement From 08cb7f06ed557c12ede3163c33cb467eed357037 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 10:37:53 +0100 Subject: [PATCH 098/104] bump version to 2.8.0 --- CHANGELOG.md | 2 +- assets/multiqc_config.yml | 4 ++-- nextflow.config | 2 +- tests/pipeline/doubleprimers.nf.test.snap | 2 +- tests/pipeline/fasta.nf.test.snap | 2 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/novaseq.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test.snap | 2 +- tests/pipeline/qiimecustom.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test.snap | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e9e7584..f58c1c96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## nf-core/ampliseq version 2.8.0dev +## nf-core/ampliseq version 2.8.0 - 2024-01-16 ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index eaf9f190..af96c9d1 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/ampliseq + This report has been generated by the nf-core/ampliseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-ampliseq-methods-description": order: -1000 diff --git a/nextflow.config b/nextflow.config index cde6082a..903eda54 100644 --- a/nextflow.config +++ b/nextflow.config @@ -338,7 +338,7 @@ manifest { description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.8.0dev' + version = '2.8.0' doi = '10.5281/zenodo.1493841' } diff --git a/tests/pipeline/doubleprimers.nf.test.snap b/tests/pipeline/doubleprimers.nf.test.snap index bfa9d45a..7cb5f68a 100644 --- a/tests/pipeline/doubleprimers.nf.test.snap +++ b/tests/pipeline/doubleprimers.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-07-27T13:49:03+0000" }, diff --git a/tests/pipeline/fasta.nf.test.snap b/tests/pipeline/fasta.nf.test.snap index db862e0f..c049085c 100644 --- a/tests/pipeline/fasta.nf.test.snap +++ b/tests/pipeline/fasta.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:06:17+0000" }, diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index 61d7a616..989cbd59 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 730ba6a4..913b3818 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/novaseq.nf.test.snap b/tests/pipeline/novaseq.nf.test.snap index 0b4abc88..e6c17e7f 100644 --- a/tests/pipeline/novaseq.nf.test.snap +++ b/tests/pipeline/novaseq.nf.test.snap @@ -7,7 +7,7 @@ }, "software_versions": { "content": [ - "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CODONS={pandas=1.1.5, python=3.9.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T00:10:02+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index c6975e2c..3cbdef01 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index 3e23ab63..c8c3a9f1 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, diff --git a/tests/pipeline/qiimecustom.nf.test.snap b/tests/pipeline/qiimecustom.nf.test.snap index e7628647..5f758fd4 100644 --- a/tests/pipeline/qiimecustom.nf.test.snap +++ b/tests/pipeline/qiimecustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 2e591854..4bfd8c2d 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, KRAKEN2_KRAKEN2={kraken2=2.1.2, pigz=2.6}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 7e0bf619..1b6e33c0 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index f2d01ce0..fed045a4 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index 2d29c03f..8441f2a5 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.12.0, yaml=6.0.1}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.3.1, dada2=1.28.0}, DADA2_FILTNTRIM={R=4.3.1, dada2=1.28.0}, DADA2_QUALITY1={R=4.3.1, ShortRead=1.58.0, dada2=1.28.0}, DADA2_TAXONOMY={R=4.3.1, dada2=1.28.0}, FASTQC={fastqc=0.12.1}, FILTER_CLUSTERS={pandas=1.1.5, python=3.9.1}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2023.7.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, VSEARCH_CLUSTER={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.8.0}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, From 24ab70982bfb6118f0f6251c453756de18c00f72 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 12 Jan 2024 10:40:59 +0100 Subject: [PATCH 099/104] fix prettier --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f58c1c96..6835818d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## nf-core/ampliseq version 2.8.0 - 2024-01-16 +## nf-core/ampliseq version 2.8.0 - 2024-01-16 ### `Added` From 0be678e6671585dd0a82a410c460eef40014891e Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 10:10:29 +0100 Subject: [PATCH 100/104] update parameter help --- nextflow_schema.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 29aee21e..84859d19 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,7 +16,7 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to tab-separated sample sheet", - "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", + "help_text": "Path to sample sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml), that points to compressed fastq files.\n\nThe sample sheet must have two to four tab-separated columns/entries with the following headers: \n- `sampleID` (required): Unique sample IDs, must start with a letter, and can only contain letters, numbers or underscores\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)", "schema": "assets/schema_input.json" }, "input_fasta": { @@ -24,14 +24,14 @@ "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to ASV/OTU fasta file", - "help_text": "Path to fasta format file with sequences that will be taxonomically classified. The fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nThe fasta sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nRelated parameters are:\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" + "help_text": "Path to fasta format file with sequences that will be taxonomically classified. The fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nThe fasta sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nRelated parameters are:\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "input_folder": { "type": "string", "mimetype": "text/tsv", "fa_icon": "fas fa-dna", "description": "Path to folder containing zipped FastQ files", - "help_text": "Path to folder containing compressed fastq files.\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy`, `--qiime_ref_taxonomy`, and/or `--sintax_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" + "help_text": "Path to folder containing compressed fastq files.\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` if the sequencing data originates from multiple sequencing runs\n- `--extension` if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- Choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: DADA2 assignTaxonomy and 16S rRNA sequence database)" }, "FW_primer": { "type": "string", @@ -372,7 +372,7 @@ }, "dada_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--dada_ref_taxonomy`. Either `--skip_dada_addspecies` (no species annotation) or `--dada_ref_tax_custom_sp` (species annotation) is additionally required. Consider also setting `--dada_assign_taxlevels`.\n\nMust be compatible to DADA2's assignTaxonomy function: 'Can be compressed. This reference fasta file should be formatted so that the id lines correspond to the taxonomy (or classification) of the associated sequence, and each taxonomic level is separated by a semicolon.' See also https://rdrr.io/bioc/dada2/man/assignTaxonomy.html", + "help_text": "Overwrites `--dada_ref_taxonomy`. Either `--skip_dada_addspecies` (no species annotation) or `--dada_ref_tax_custom_sp` (species annotation) is additionally required. Consider also setting `--dada_assign_taxlevels`.\n\nMust be compatible to DADA2's assignTaxonomy function: 'Can be compressed. This reference fasta file should be formatted so that the id lines correspond to the taxonomy (or classification) of the associated sequence, and each taxonomic level is separated by a semicolon.' See also https://rdrr.io/bioc/dada2/man/assignTaxonomy.html", "description": "Path to a custom DADA2 reference taxonomy database" }, "dada_ref_tax_custom_sp": { @@ -449,7 +449,7 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", + "help_text": "Overwrites `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" }, "classifier": { @@ -475,7 +475,7 @@ }, "kraken2_ref_tax_custom": { "type": "string", - "help_text": "Is preferred over `--kraken2_ref_taxonomy`. Consider also setting `--kraken2_assign_taxlevels`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database. See also https://benlangmead.github.io/aws-indexes/k2.", + "help_text": "Overwrites `--kraken2_ref_taxonomy`. Consider also setting `--kraken2_assign_taxlevels`. Can be compressed tar archive (.tar.gz|.tgz) or folder containing the database. See also https://benlangmead.github.io/aws-indexes/k2.", "description": "Path to a custom Kraken2 reference taxonomy database (*.tar.gz|*.tgz archive or folder)" }, "kraken2_assign_taxlevels": { From 678ce1aa94fabe66aff6a0c0189282780c0fe864 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 10:32:05 +0100 Subject: [PATCH 101/104] update qiime_ref_tax_custom help text --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 84859d19..938690d4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -449,8 +449,8 @@ }, "qiime_ref_tax_custom": { "type": "string", - "help_text": "Overwrites `--qiime_ref_taxonomy`. A comma separated pair of (possibly gzipped) filepaths (sequence, taxonomy).", - "description": "Path to files of a custom QIIME2 reference taxonomy database (files may be gzipped)" + "help_text": "Overwrites `--qiime_ref_taxonomy`. Either path to tarball (`*.tar.gz` or `*.tgz`) that contains sequence (`*.fna`) and taxonomy (`*.tax`) data, or alternatively a comma separated pair of filepaths to sequence (`*.fna`) and taxonomy (`*.tax`) data (possibly gzipped `*.gz`).", + "description": "Path to files of a custom QIIME2 reference taxonomy database (tarball, or two comma-separated files)" }, "classifier": { "type": "string", From 5ad866f15c31ee3b3d56ad7a2c57a28aff8121d1 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 10:46:46 +0100 Subject: [PATCH 102/104] change qiime_ref_tax_custom test files to nf-core --- CHANGELOG.md | 4 ++-- conf/test_qiimecustom.config | 2 +- conf/test_reftaxcustom.config | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6835818d..402d8f72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#666](https://github.com/nf-core/ampliseq/pull/666) - Added Greengenes2 database, version 2022.10, support for QIIME2 taxonomic classification. -- [#667](https://github.com/nf-core/ampliseq/pull/667) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification +- [#667](https://github.com/nf-core/ampliseq/pull/667),[#691](https://github.com/nf-core/ampliseq/pull/691) - Added `--qiime_ref_tax_custom` to permit custom reference database for QIIME2 taxonomic classification - [#674](https://github.com/nf-core/ampliseq/pull/674) - Add PhytoRef database for DADA2 taxonomy assignment using `--dada_ref_taxonomy phytoref` - [#675](https://github.com/nf-core/ampliseq/pull/675) - Add the Zehr lab nifH database for DADA2 taxonomy assignment using `--dada_ref_taxonomy zehr-nifh` - [#681](https://github.com/nf-core/ampliseq/pull/681) - For DADA2, with `--dada_addspecies_allowmultiple` multiple exact species matches are reported and with `--dada_taxonomy_rc` reverse-complement matches are also considered in taxonomic classification @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/688) - Updated documentation +- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/6),[#691](https://github.com/nf-core/ampliseq/pull/691) - Updated documentation - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680),[#673](https://github.com/nf-core/ampliseq/pull/673) - Improved pipeline summary report & error messages diff --git a/conf/test_qiimecustom.config b/conf/test_qiimecustom.config index 2fc9cb73..dd02eb4e 100644 --- a/conf/test_qiimecustom.config +++ b/conf/test_qiimecustom.config @@ -25,7 +25,7 @@ params { input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet.tsv" // Custom reference taxonomy - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.fna.gz,https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tax.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.fna.gz,https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tax.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index 1afe1c2d..40408bfb 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -30,7 +30,7 @@ params { dada_assign_taxlevels = "Kingdom,Phylum,Class,Order,Family,Genus" kraken2_ref_tax_custom = "https://genome-idx.s3.amazonaws.com/kraken/16S_Greengenes13.5_20200326.tgz" kraken2_assign_taxlevels = "D,P,C,O" - qiime_ref_tax_custom = "https://raw.githubusercontent.com/MatthewJM96/test-datasets/ampliseq/testdata/db/85_greengenes.tar.gz" + qiime_ref_tax_custom = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/85_greengenes.tar.gz" // Skip downstream analysis with QIIME2 skip_qiime_downstream = true From 4694510a8f9d0846217f34d5cb27c161a7f916d4 Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 12:51:23 +0100 Subject: [PATCH 103/104] replace local gzip with nf-core module pigz/uncompress --- modules.json | 5 ++ modules/local/gzip_decompress.nf | 32 ------------- modules/nf-core/pigz/uncompress/main.nf | 48 +++++++++++++++++++ modules/nf-core/pigz/uncompress/meta.yml | 32 +++++++++++++ .../pigz/uncompress/tests/main.nf.test | 33 +++++++++++++ .../pigz/uncompress/tests/main.nf.test.snap | 21 ++++++++ .../nf-core/pigz/uncompress/tests/tags.yml | 2 + subworkflows/local/qiime2_preptax.nf | 8 ++-- 8 files changed, 145 insertions(+), 36 deletions(-) delete mode 100644 modules/local/gzip_decompress.nf create mode 100644 modules/nf-core/pigz/uncompress/main.nf create mode 100644 modules/nf-core/pigz/uncompress/meta.yml create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test create mode 100644 modules/nf-core/pigz/uncompress/tests/main.nf.test.snap create mode 100644 modules/nf-core/pigz/uncompress/tests/tags.yml diff --git a/modules.json b/modules.json index 6969dc1d..595c024f 100644 --- a/modules.json +++ b/modules.json @@ -81,6 +81,11 @@ "git_sha": "4ab13872435962dadc239979554d13709e20bf29", "installed_by": ["modules"] }, + "pigz/uncompress": { + "branch": "master", + "git_sha": "4ef7becf6a2bbc8df466885d10b4051d1f318a6a", + "installed_by": ["modules"] + }, "untar": { "branch": "master", "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", diff --git a/modules/local/gzip_decompress.nf b/modules/local/gzip_decompress.nf deleted file mode 100644 index c6ea37a5..00000000 --- a/modules/local/gzip_decompress.nf +++ /dev/null @@ -1,32 +0,0 @@ -process GZIP_DECOMPRESS { - tag "$file" - label 'process_single' - - conda "conda-forge::sed=4.7 conda-forge::gzip=1.13" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - path(file) - - output: - path("$outfile"), emit: ungzip - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - outfile = task.ext.outfile ?: file.baseName.toString().replaceFirst(/\.gz$/, "") - - """ - gzip $args -c -d $file > $outfile - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gzip: \$(echo \$(gzip --version 2>&1) | sed 's/gzip //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf new file mode 100644 index 00000000..9383c146 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/main.nf @@ -0,0 +1,48 @@ +process PIGZ_UNCOMPRESS { + label 'process_low' + //stageInMode 'copy' // this directive can be set in case the original input should be kept + + conda "conda-forge::pigz" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + path zip + + output: + path "${uncompressed_filename}" , emit: file + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + // calling pigz -f to make it follow symlinks + """ + unpigz \\ + -p $task.cpus \\ + -fk \\ + $args \\ + ${zip} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + uncompressed_filename = zip.toString() - '.gz' + """ + touch ${zip.dropRight(3)} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml new file mode 100644 index 00000000..574a004b --- /dev/null +++ b/modules/nf-core/pigz/uncompress/meta.yml @@ -0,0 +1,32 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "pigz_uncompress" +description: write your description here +keywords: + - uncompress + - gzip + - parallelized +tools: + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - zip: + type: file + description: Gzipped file + pattern: "*.{gzip}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - file: + type: file + description: File to compress + pattern: "*" + +authors: + - "@lrauschning" diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test new file mode 100644 index 00000000..57955658 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process PIGZ_UNCOMPRESS" + script "modules/nf-core/pigz/uncompress/main.nf" + process "PIGZ_UNCOMPRESS" + tag "modules" + tag "modules_nfcore" + tag "pigz" + tag "pigz/uncompress" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap new file mode 100644 index 00000000..038cf2d7 --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap @@ -0,0 +1,21 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ], + "1": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ], + "file": [ + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ], + "versions": [ + "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683" + ] + } + ], + "timestamp": "2023-10-18T12:37:21.987858" + } +} \ No newline at end of file diff --git a/modules/nf-core/pigz/uncompress/tests/tags.yml b/modules/nf-core/pigz/uncompress/tests/tags.yml new file mode 100644 index 00000000..6719a90a --- /dev/null +++ b/modules/nf-core/pigz/uncompress/tests/tags.yml @@ -0,0 +1,2 @@ +pigz/uncompress: + - modules/nf-core/pigz/uncompress/** diff --git a/subworkflows/local/qiime2_preptax.nf b/subworkflows/local/qiime2_preptax.nf index dfa28725..ce7bac78 100644 --- a/subworkflows/local/qiime2_preptax.nf +++ b/subworkflows/local/qiime2_preptax.nf @@ -3,7 +3,7 @@ */ include { UNTAR } from '../../modules/nf-core/untar/main' -include { GZIP_DECOMPRESS } from '../../modules/local/gzip_decompress.nf' +include { PIGZ_UNCOMPRESS } from '../../modules/nf-core/pigz/uncompress/main' include { FORMAT_TAXONOMY_QIIME } from '../../modules/local/format_taxonomy_qiime' include { QIIME2_EXTRACT } from '../../modules/local/qiime2_extract' include { QIIME2_TRAIN } from '../../modules/local/qiime2_train' @@ -29,10 +29,10 @@ workflow QIIME2_PREPTAX { }.set { ch_qiime_ref_tax_branched } ch_qiime_ref_tax_branched.failed.subscribe { error "$it is neither a compressed (ends with `.gz`) or decompressed sequence (ends with `.fna`) or taxonomy file (ends with `.tax`). Please review input." } - GZIP_DECOMPRESS(ch_qiime_ref_tax_branched.compressed) - ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(GZIP_DECOMPRESS.out.versions) + PIGZ_UNCOMPRESS(ch_qiime_ref_tax_branched.compressed) + ch_qiime2_preptax_versions = ch_qiime2_preptax_versions.mix(PIGZ_UNCOMPRESS.out.versions) - ch_qiime_db_files = GZIP_DECOMPRESS.out.ungzip + ch_qiime_db_files = PIGZ_UNCOMPRESS.out.file ch_qiime_db_files = ch_qiime_db_files.mix(ch_qiime_ref_tax_branched.decompressed) ch_ref_database_fna = ch_qiime_db_files.filter { From 08353dd6d648fc489b324fa753d91dfd81cf848e Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 15 Jan 2024 12:54:16 +0100 Subject: [PATCH 104/104] fix typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 402d8f72..723d4a83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/6),[#691](https://github.com/nf-core/ampliseq/pull/691) - Updated documentation +- [#672](https://github.com/nf-core/ampliseq/pull/672),[#688](https://github.com/nf-core/ampliseq/pull/688),[#691](https://github.com/nf-core/ampliseq/pull/691) - Updated documentation - [#676](https://github.com/nf-core/ampliseq/pull/676) - Phyloseq sometimes only produced one of multiple output files - [#679](https://github.com/nf-core/ampliseq/pull/679) - Prevent masking low complexity regions by VSEARCH with lower case letters - [#680](https://github.com/nf-core/ampliseq/pull/680),[#673](https://github.com/nf-core/ampliseq/pull/673) - Improved pipeline summary report & error messages