From 22bef2a979ee38af924c3757b8996e88c3ea0b8e Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Tue, 8 Aug 2023 10:53:07 +0800 Subject: [PATCH 1/9] Added phyloseq object creation --- CITATIONS.md | 4 ++ bin/reformat_tax_for_phyloseq.py | 32 ++++++++++++++ conf/modules.config | 8 ++++ docs/output.md | 13 ++++++ modules/local/phyloseq.nf | 59 ++++++++++++++++++++++++++ modules/local/phyloseq_inasv.nf | 28 ++++++++++++ modules/local/phyloseq_intax.nf | 29 +++++++++++++ tests/pipeline/iontorrent.nf.test | 3 +- tests/pipeline/multi.nf.test | 3 +- tests/pipeline/pacbio_its.nf.test | 3 +- tests/pipeline/pplace.nf.test | 4 +- tests/pipeline/reftaxcustom.nf.test | 3 +- tests/pipeline/single.nf.test | 3 +- tests/pipeline/sintax.nf.test | 3 +- tests/pipeline/test.nf.test | 4 +- workflows/ampliseq.nf | 66 +++++++++++++++++++++++++++-- 16 files changed, 254 insertions(+), 11 deletions(-) create mode 100755 bin/reformat_tax_for_phyloseq.py create mode 100644 modules/local/phyloseq.nf create mode 100644 modules/local/phyloseq_inasv.nf create mode 100644 modules/local/phyloseq_intax.nf diff --git a/CITATIONS.md b/CITATIONS.md index e488e7bd..44ef54bb 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -109,6 +109,10 @@ > Jari Oksanen, F. Guillaume Blanchet, Michael Friendly, Roeland Kindt, Pierre Legendre, Dan McGlinn, Peter R. Minchin, R. B. O’Hara, Gavin L. Simpson, Peter Solymos, M. Henry H. Stevens, Eduard Szoecs, and Helene Wagner. vegan: Community Ecology Package. 2018. R package version 2.5-3. +- [Phyloseq](https://doi.org/10.1371/journal.pone.0061217) + + > McMurdie PJ, Holmes S (2013). “phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data.” PLoS ONE, 8(4), e61217. + ### Non-default tools - [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073) diff --git a/bin/reformat_tax_for_phyloseq.py b/bin/reformat_tax_for_phyloseq.py new file mode 100755 index 00000000..9a3281fb --- /dev/null +++ b/bin/reformat_tax_for_phyloseq.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import pandas as pd +import sys + +tax_file = sys.argv[1] +out_file = sys.argv[2] + +# Import tsv file +tax_df = pd.read_csv(tax_file, sep="\t") + +# The second column should hold the taxonomy information +tax_col = tax_df.columns[1] + +# Split the values in the tax column +split_tax = tax_df[tax_col].str.split(';', expand=True) + +# Assign names to the new columns with an auto incrementing integer +new_col_names = [f'{tax_col}_{i+1}' for i in range(split_tax.shape[1])] +split_tax.columns = new_col_names + +# Strip whitespace from the tax names +split_tax = split_tax.applymap(lambda x: x.strip() if isinstance(x, str) else x) + +# Drop the original tax column +tax_df = tax_df.drop(columns=[tax_col]) + +# Add the new tax columns to the df +result = pd.concat([tax_df, split_tax], axis=1) + +# Create new tsv file +result.to_csv(out_file, sep='\t', index=False) diff --git a/conf/modules.config b/conf/modules.config index 95d8569a..c431e4e0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -785,6 +785,14 @@ process { ] } + withName: 'PHYLOSEQ' { + publishDir = [ + path: { "${params.outdir}/phyloseq" }, + mode: params.publish_dir_mode, + pattern: "*.rds" + ] + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/docs/output.md b/docs/output.md index d3d37beb..305e578a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -41,6 +41,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Diversity analysis](#diversity-analysis) - High level overview with different diversity indices - [ANCOM](#ancom) - Differential abundance analysis - [PICRUSt2](#picrust2) - Predict the functional potential of a bacterial community +- [Phyloseq](#phyloseq) - Phyloseq R objects - [Read count report](#read-count-report) - Report of read counts during various steps of the pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -518,6 +519,18 @@ Most of the fields in the template will not be populated by the export process, +### Phyloseq + +This directory will hold phyloseq objects for each taxonomy table produced by this pipeline. The objects will contain an ASV abundance table and a taxonomy table. If the pipeline is provided with metadata, that metadata will also be included in the phyloseq object. A phylogenetic tree will also be included if the pipeline produces a tree. + +
+Output files + +- `phyloseq/` + - `_phyloseq.rds`: Phyloseq R object. + +
+ ## Read count report This report includes information on how many reads per sample passed each pipeline step in which a loss can occur. Specifically, how many read pairs entered cutadapt, were reverse complemented, passed trimming; how many read pairs entered DADA2, were denoised, merged and non-chimeric; and how many counts were lost during excluding unwanted taxa and removing low abundance/prevalence sequences in QIIME2. diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf new file mode 100644 index 00000000..6e5923e9 --- /dev/null +++ b/modules/local/phyloseq.nf @@ -0,0 +1,59 @@ +process PHYLOSEQ { + tag "$prefix" + label 'process_low' + + conda "bioconda::bioconductor-phyloseq=1.44.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' : + 'quay.io/biocontainers/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' }" + + input: + tuple val(prefix), path(tax_tsv) + path otu_tsv + path sam_tsv + path tree + + output: + tuple val(prefix), path("*phyloseq.rds"), emit: rds + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def sam_tsv = "\"${sam_tsv}\"" + def otu_tsv = "\"${otu_tsv}\"" + def tax_tsv = "\"${tax_tsv}\"" + def tree = "\"${tree}\"" + def prefix = "\"${prefix}\"" + """ + #!/usr/bin/env Rscript + + suppressPackageStartupMessages(library(phyloseq)) + + otu_df <- read.table($otu_tsv, sep="\\t", header=TRUE, row.names=1) + tax_df <- read.table($tax_tsv, sep="\\t", header=TRUE, row.names=1) + otu_mat <- as.matrix(otu_df) + tax_mat <- as.matrix(tax_df) + + OTU <- otu_table(otu_mat, taxa_are_rows=TRUE) + TAX <- tax_table(tax_mat) + phy_obj <- phyloseq(OTU, TAX) + + if (file.exists($sam_tsv)) { + sam_df <- read.table($sam_tsv, sep="\\t", header=TRUE, row.names=1) + SAM <- sample_data(sam_df) + phy_obj <- merge_phyloseq(phy_obj, SAM) + } + + if (file.exists($tree)) { + TREE <- read_tree($tree) + phy_obj <- merge_phyloseq(phy_obj, TREE) + } + + saveRDS(phy_obj, file = paste0($prefix, "_phyloseq.rds")) + + # Version information + writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),paste0(" phyloseq: ", packageVersion("phyloseq"))), "versions.yml") + """ +} \ No newline at end of file diff --git a/modules/local/phyloseq_inasv.nf b/modules/local/phyloseq_inasv.nf new file mode 100644 index 00000000..f66d1669 --- /dev/null +++ b/modules/local/phyloseq_inasv.nf @@ -0,0 +1,28 @@ +process PHYLOSEQ_INASV { + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + path(biom_file) + + output: + path( "*.tsv" ) , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + tail $biom_file -n +2 | sed '1s/#OTU ID/ASV_ID/' > reformat_$biom_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bash: \$(bash --version | sed -n 1p | sed 's/GNU bash, version //g') + END_VERSIONS + """ +} diff --git a/modules/local/phyloseq_intax.nf b/modules/local/phyloseq_intax.nf new file mode 100644 index 00000000..6dbd8487 --- /dev/null +++ b/modules/local/phyloseq_intax.nf @@ -0,0 +1,29 @@ +process PHYLOSEQ_INTAX { + label 'process_low' + + conda "conda-forge::pandas=1.1.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pandas:1.1.5': + 'biocontainers/pandas:1.1.5' }" + + input: + path(tax_tsv) + + output: + path( "*.tsv" ) , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + reformat_tax_for_phyloseq.py $tax_tsv reformat_$tax_tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version 2>&1 | sed 's/Python //g') + pandas: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('pandas').version)") + END_VERSIONS + """ +} diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index 9b73af86..a4a16631 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,7 +38,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index e4fe28a0..75e2e374 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -63,7 +63,8 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() }, { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), - path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") } + path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index 39e1d2a2..144db928 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -52,7 +52,8 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index b78c479b..9c4b1806 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -55,7 +55,9 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() }, { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 42e0d104..19035ccb 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -43,7 +43,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index be236c9a..1aa634a0 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -44,7 +44,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index f6de2995..fb0c8c15 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -65,7 +65,8 @@ nextflow_pipeline { { assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() }, { assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index 7b295941..e8ba0ce0 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -93,7 +93,9 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 03e5bf55..025c8d2a 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -165,6 +165,10 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax' include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' +include { PHYLOSEQ } from '../modules/local/phyloseq' +include { PHYLOSEQ_INASV } from '../modules/local/phyloseq_inasv' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -456,7 +460,7 @@ workflow AMPLISEQ { } FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) - + ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv } else { ch_pplace_tax = Channel.empty() @@ -477,7 +481,7 @@ workflow AMPLISEQ { ch_qiime_classifier ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning - } + } // // SUBWORKFLOW / MODULES : Downstream analysis with QIIME2 @@ -597,7 +601,7 @@ workflow AMPLISEQ { tax_agglom_max ) } - } + } // // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 @@ -627,6 +631,62 @@ workflow AMPLISEQ { ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first()) } + // + // MODULE: Create phyloseq objects + // + if ( !params.skip_taxonomy ) { + if ( params.metadata ) { + ch_phyloseq_inmeta = ch_metadata.first() // The .first() is to make sure it's a value channel + } else { + ch_phyloseq_inmeta = [] + } + + ch_phyloseq_intax = Channel.empty() + if ( !params.skip_dada_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_dada2_tax.map { it = [ "dada2", file(it) ] } + ) + } + + if ( params.sintax_ref_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_sintax_tax.map { it = [ "sintax", file(it) ] } + ) + } + + if ( params.pplace_tree ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_PPLACE ( + ch_pplace_tax + ).tsv.map { it = [ "pplace", file(it) ] } + ) + + ch_phyloseq_intree = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny.map { it = it[1] }.first() + } else { + ch_phyloseq_intree = [] + } + + if ( run_qiime2 ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_QIIME2 ( + QIIME2_TAXONOMY.out.tsv + ).tsv.map { it = [ "qiime2", file(it) ] } + ) + + if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { + ch_phyloseq_inasv = PHYLOSEQ_INASV ( QIIME2_FILTERTAXA.out.tsv ).tsv + + } else { + ch_phyloseq_inasv = ch_dada2_asv + } + } else { + ch_phyloseq_inasv = ch_dada2_asv + } + + PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + ch_versions = ch_versions.mix(PHYLOSEQ.out.versions.first()) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From 44879f157bc14ab4231a28b613583efb35d83afd Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Tue, 8 Aug 2023 11:21:33 +0800 Subject: [PATCH 2/9] updated README.md to mention phyloseq object creation --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 56e499a3..8344d847 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ By default, the pipeline currently performs the following: - Taxonomical classification using DADA2, [SINTAX](https://doi.org/10.1101/074161) or [QIIME2](https://www.nature.com/articles/s41587-019-0209-9) - Excludes unwanted taxa, produces absolute and relative feature/taxa count tables and plots, plots alpha rarefaction curves, computes alpha and beta diversity indices and plots thereof ([QIIME2](https://www.nature.com/articles/s41587-019-0209-9)) - Calls differentially abundant taxa ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) +- Creates phyloseq R objects ([Phyloseq](https://www.bioconductor.org/packages/release/bioc/html/phyloseq.html)) - Overall pipeline run summaries ([MultiQC](https://multiqc.info/)) ## Usage From 51c2fcf44a9bd76482557926554beb1b09083925 Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:15:45 +0800 Subject: [PATCH 3/9] Update workflows/ampliseq.nf Co-authored-by: Daniel Lundin --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 025c8d2a..9d7d58ad 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -601,7 +601,7 @@ workflow AMPLISEQ { tax_agglom_max ) } - } + } // // MODULE: Predict functional potential of a bacterial community from marker genes with Picrust2 From 9e2f218802c19e17a440ff5736f8d4e13ff492e8 Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:16:02 +0800 Subject: [PATCH 4/9] Update workflows/ampliseq.nf Co-authored-by: Daniel Lundin --- workflows/ampliseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 9d7d58ad..1ed6d159 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -481,7 +481,7 @@ workflow AMPLISEQ { ch_qiime_classifier ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning - } + } // // SUBWORKFLOW / MODULES : Downstream analysis with QIIME2 From efbb04323f585a8d9611d34797fee66a6b598344 Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:16:15 +0800 Subject: [PATCH 5/9] Update workflows/ampliseq.nf Co-authored-by: Daniel Lundin --- workflows/ampliseq.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 1ed6d159..a0b0a9f7 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -460,7 +460,6 @@ workflow AMPLISEQ { } FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) - ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv } else { ch_pplace_tax = Channel.empty() From 908f6554c1337eb9fe0e7dab0246fae27aad188b Mon Sep 17 00:00:00 2001 From: Adam Bennett <43841526+a4000@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:16:26 +0800 Subject: [PATCH 6/9] Update modules/local/phyloseq.nf Co-authored-by: Daniel Lundin --- modules/local/phyloseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index 6e5923e9..3df9c6db 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -24,8 +24,8 @@ process PHYLOSEQ { def sam_tsv = "\"${sam_tsv}\"" def otu_tsv = "\"${otu_tsv}\"" def tax_tsv = "\"${tax_tsv}\"" - def tree = "\"${tree}\"" - def prefix = "\"${prefix}\"" + def tree = "\"${tree}\"" + def prefix = "\"${prefix}\"" """ #!/usr/bin/env Rscript From 01212b6da54f3101efe9c1fc0eef97b00351e144 Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Wed, 9 Aug 2023 09:23:50 +0800 Subject: [PATCH 7/9] added phyloseq workflow --- modules/local/phyloseq.nf | 6 +- subworkflows/local/phyloseq_workflow.nf | 75 +++++++++++++++++++++++++ tests/pipeline/iontorrent.nf.test | 3 +- tests/pipeline/multi.nf.test | 3 +- tests/pipeline/pacbio_its.nf.test | 3 +- tests/pipeline/pplace.nf.test | 4 +- tests/pipeline/reftaxcustom.nf.test | 3 +- tests/pipeline/single.nf.test | 3 +- tests/pipeline/sintax.nf.test | 3 +- tests/pipeline/test.nf.test | 4 +- workflows/ampliseq.nf | 73 +++++++----------------- 11 files changed, 109 insertions(+), 71 deletions(-) create mode 100644 subworkflows/local/phyloseq_workflow.nf diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index 3df9c6db..4ede387d 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -54,6 +54,10 @@ process PHYLOSEQ { saveRDS(phy_obj, file = paste0($prefix, "_phyloseq.rds")) # Version information - writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),paste0(" phyloseq: ", packageVersion("phyloseq"))), "versions.yml") + writeLines(c("\\"${task.process}\\":", + paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")), + paste0(" phyloseq: ", packageVersion("phyloseq"))), + "versions.yml" + ) """ } \ No newline at end of file diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf new file mode 100644 index 00000000..406f7756 --- /dev/null +++ b/subworkflows/local/phyloseq_workflow.nf @@ -0,0 +1,75 @@ +/* + * Create phyloseq objects + */ + +include { PHYLOSEQ } from '../../modules/local/phyloseq' +include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../../modules/local/phyloseq_intax' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../../modules/local/phyloseq_intax' + +workflow PHYLOSEQ_WORKFLOW { + take: + ch_dada2_tax + ch_sintax_tax + ch_pplace_tax + ch_qiime2_tax + ch_tsv + ch_meta + ch_tree + run_qiime2 + + main: + if ( params.metadata ) { + ch_phyloseq_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel + } else { + ch_phyloseq_inmeta = [] + } + + ch_phyloseq_intax = Channel.empty() + if ( !params.skip_dada_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_dada2_tax.map { it = [ "dada2", file(it) ] } + ) + } + + if ( params.sintax_ref_taxonomy ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + ch_sintax_tax.map { it = [ "sintax", file(it) ] } + ) + } + + if ( params.pplace_tree ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_PPLACE ( + ch_pplace_tax + ).tsv.map { it = [ "pplace", file(it) ] } + ) + + ch_phyloseq_intree = ch_tree.map { it = it[1] }.first() + } else { + ch_phyloseq_intree = [] + } + + if ( run_qiime2 ) { + ch_phyloseq_intax = ch_phyloseq_intax.mix ( + PHYLOSEQ_INTAX_QIIME2 ( + ch_qiime2_tax + ).tsv.map { it = [ "qiime2", file(it) ] } + ) + + if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { + ch_phyloseq_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv + + } else { + ch_phyloseq_inasv = ch_tsv + } + } else { + ch_phyloseq_inasv = ch_tsv + } + + PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + + emit: + rds = PHYLOSEQ.out.rds + versions= PHYLOSEQ.out.versions +} diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index a4a16631..9b73af86 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,8 +38,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index 75e2e374..e4fe28a0 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -63,8 +63,7 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() }, { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), - path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index 144db928..39e1d2a2 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -52,8 +52,7 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 9c4b1806..b78c479b 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -55,9 +55,7 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() }, { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 19035ccb..42e0d104 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -43,8 +43,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index 1aa634a0..be236c9a 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -44,8 +44,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index fb0c8c15..f6de2995 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -65,8 +65,7 @@ nextflow_pipeline { { assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() }, { assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } ) } } diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index e8ba0ce0..7b295941 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -93,9 +93,7 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } ) } } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index a0b0a9f7..6608b86d 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -165,10 +165,6 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax' include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' -include { PHYLOSEQ } from '../modules/local/phyloseq' -include { PHYLOSEQ_INASV } from '../modules/local/phyloseq_inasv' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -185,6 +181,7 @@ include { QIIME2_EXPORT } from '../subworkflows/local/qiime2_exp include { QIIME2_BARPLOTAVG } from '../subworkflows/local/qiime2_barplotavg' include { QIIME2_DIVERSITY } from '../subworkflows/local/qiime2_diversity' include { QIIME2_ANCOM } from '../subworkflows/local/qiime2_ancom' +include { PHYLOSEQ_WORKFLOW } from '../subworkflows/local/phyloseq_workflow' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -480,6 +477,9 @@ workflow AMPLISEQ { ch_qiime_classifier ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning + ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv + } else { + ch_qiime2_tax = Channel.empty() } // @@ -543,7 +543,7 @@ workflow AMPLISEQ { } //Export various ASV tables if (!params.skip_abundance_tables) { - QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, QIIME2_TAXONOMY.out.tsv, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max ) + QIIME2_EXPORT ( ch_asv, ch_seq, ch_tax, ch_qiime2_tax, ch_dada2_tax, ch_pplace_tax, ch_sintax_tax, tax_agglom_min, tax_agglom_max ) } if (!params.skip_barplot) { @@ -600,6 +600,8 @@ workflow AMPLISEQ { tax_agglom_max ) } + } else { + ch_tsv = ch_dada2_asv } // @@ -631,59 +633,26 @@ workflow AMPLISEQ { } // - // MODULE: Create phyloseq objects + // SUBWORKFLOW: Create phyloseq objects // if ( !params.skip_taxonomy ) { - if ( params.metadata ) { - ch_phyloseq_inmeta = ch_metadata.first() // The .first() is to make sure it's a value channel - } else { - ch_phyloseq_inmeta = [] - } - - ch_phyloseq_intax = Channel.empty() - if ( !params.skip_dada_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_dada2_tax.map { it = [ "dada2", file(it) ] } - ) - } - - if ( params.sintax_ref_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_sintax_tax.map { it = [ "sintax", file(it) ] } - ) - } - if ( params.pplace_tree ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_PPLACE ( - ch_pplace_tax - ).tsv.map { it = [ "pplace", file(it) ] } - ) - - ch_phyloseq_intree = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny.map { it = it[1] }.first() + ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny } else { - ch_phyloseq_intree = [] + ch_tree_for_phyloseq = [] } - - if ( run_qiime2 ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_QIIME2 ( - QIIME2_TAXONOMY.out.tsv - ).tsv.map { it = [ "qiime2", file(it) ] } - ) - - if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { - ch_phyloseq_inasv = PHYLOSEQ_INASV ( QIIME2_FILTERTAXA.out.tsv ).tsv - } else { - ch_phyloseq_inasv = ch_dada2_asv - } - } else { - ch_phyloseq_inasv = ch_dada2_asv - } - - PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) - ch_versions = ch_versions.mix(PHYLOSEQ.out.versions.first()) + PHYLOSEQ_WORKFLOW ( + ch_dada2_tax.ifEmpty([]), + ch_sintax_tax.ifEmpty([]), + ch_pplace_tax.ifEmpty([]), + ch_qiime2_tax.ifEmpty([]), + ch_tsv, + ch_metadata.ifEmpty([]), + ch_tree_for_phyloseq, + run_qiime2 + ) + ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first()) } CUSTOM_DUMPSOFTWAREVERSIONS ( From 29c47b6219863fba1516285133895bac3e83a7e3 Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Thu, 10 Aug 2023 14:54:26 +0800 Subject: [PATCH 8/9] modified test files for phyloseq and fixed a bug with phyloseq workflow that happens in some test profiles --- bin/reformat_tax_for_phyloseq.py | 6 +-- modules/local/phyloseq.nf | 14 +++---- subworkflows/local/phyloseq_workflow.nf | 49 +++++------------------- tests/pipeline/iontorrent.nf.test | 3 +- tests/pipeline/iontorrent.nf.test.snap | 2 +- tests/pipeline/multi.nf.test | 3 +- tests/pipeline/multi.nf.test.snap | 2 +- tests/pipeline/pacbio_its.nf.test | 3 +- tests/pipeline/pacbio_its.nf.test.snap | 2 +- tests/pipeline/pplace.nf.test | 4 +- tests/pipeline/pplace.nf.test.snap | 2 +- tests/pipeline/reftaxcustom.nf.test | 3 +- tests/pipeline/reftaxcustom.nf.test.snap | 2 +- tests/pipeline/single.nf.test | 4 +- tests/pipeline/single.nf.test.snap | 2 +- tests/pipeline/sintax.nf.test | 3 +- tests/pipeline/sintax.nf.test.snap | 2 +- tests/pipeline/test.nf.test | 4 +- tests/pipeline/test.nf.test.snap | 2 +- workflows/ampliseq.nf | 16 +++++--- 20 files changed, 57 insertions(+), 71 deletions(-) diff --git a/bin/reformat_tax_for_phyloseq.py b/bin/reformat_tax_for_phyloseq.py index 9a3281fb..f35aaf03 100755 --- a/bin/reformat_tax_for_phyloseq.py +++ b/bin/reformat_tax_for_phyloseq.py @@ -13,10 +13,10 @@ tax_col = tax_df.columns[1] # Split the values in the tax column -split_tax = tax_df[tax_col].str.split(';', expand=True) +split_tax = tax_df[tax_col].str.split(";", expand=True) # Assign names to the new columns with an auto incrementing integer -new_col_names = [f'{tax_col}_{i+1}' for i in range(split_tax.shape[1])] +new_col_names = [f"{tax_col}_{i+1}" for i in range(split_tax.shape[1])] split_tax.columns = new_col_names # Strip whitespace from the tax names @@ -29,4 +29,4 @@ result = pd.concat([tax_df, split_tax], axis=1) # Create new tsv file -result.to_csv(out_file, sep='\t', index=False) +result.to_csv(out_file, sep="\t", index=False) diff --git a/modules/local/phyloseq.nf b/modules/local/phyloseq.nf index 4ede387d..54537213 100644 --- a/modules/local/phyloseq.nf +++ b/modules/local/phyloseq.nf @@ -1,7 +1,7 @@ process PHYLOSEQ { tag "$prefix" label 'process_low' - + conda "bioconda::bioconductor-phyloseq=1.44.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-phyloseq:1.44.0--r43hdfd78af_0' : @@ -9,10 +9,10 @@ process PHYLOSEQ { input: tuple val(prefix), path(tax_tsv) - path otu_tsv + path otu_tsv path sam_tsv path tree - + output: tuple val(prefix), path("*phyloseq.rds"), emit: rds path "versions.yml" , emit: versions @@ -52,12 +52,12 @@ process PHYLOSEQ { } saveRDS(phy_obj, file = paste0($prefix, "_phyloseq.rds")) - + # Version information - writeLines(c("\\"${task.process}\\":", + writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")), - paste0(" phyloseq: ", packageVersion("phyloseq"))), + paste0(" phyloseq: ", packageVersion("phyloseq"))), "versions.yml" ) """ -} \ No newline at end of file +} diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf index 406f7756..adf208b7 100644 --- a/subworkflows/local/phyloseq_workflow.nf +++ b/subworkflows/local/phyloseq_workflow.nf @@ -4,70 +4,39 @@ include { PHYLOSEQ } from '../../modules/local/phyloseq' include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../../modules/local/phyloseq_intax' -include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../../modules/local/phyloseq_intax' workflow PHYLOSEQ_WORKFLOW { take: - ch_dada2_tax - ch_sintax_tax - ch_pplace_tax - ch_qiime2_tax + ch_tax ch_tsv ch_meta ch_tree run_qiime2 - + main: if ( params.metadata ) { ch_phyloseq_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel - } else { - ch_phyloseq_inmeta = [] - } - - ch_phyloseq_intax = Channel.empty() - if ( !params.skip_dada_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_dada2_tax.map { it = [ "dada2", file(it) ] } - ) - } - - if ( params.sintax_ref_taxonomy ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - ch_sintax_tax.map { it = [ "sintax", file(it) ] } - ) + } else { + ch_phyloseq_inmeta = [] } if ( params.pplace_tree ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_PPLACE ( - ch_pplace_tax - ).tsv.map { it = [ "pplace", file(it) ] } - ) - ch_phyloseq_intree = ch_tree.map { it = it[1] }.first() } else { ch_phyloseq_intree = [] } - - if ( run_qiime2 ) { - ch_phyloseq_intax = ch_phyloseq_intax.mix ( - PHYLOSEQ_INTAX_QIIME2 ( - ch_qiime2_tax - ).tsv.map { it = [ "qiime2", file(it) ] } - ) + if ( run_qiime2 ) { if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { ch_phyloseq_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv - - } else { - ch_phyloseq_inasv = ch_tsv + } else { + ch_phyloseq_inasv = ch_tsv } - } else { + } else { ch_phyloseq_inasv = ch_tsv } - PHYLOSEQ ( ch_phyloseq_intax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) + PHYLOSEQ ( ch_tax, ch_phyloseq_inasv, ch_phyloseq_inmeta, ch_phyloseq_intree ) emit: rds = PHYLOSEQ.out.rds diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index 9b73af86..a4a16631 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,7 +38,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_it_SE_ITS.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/iontorrent.nf.test.snap b/tests/pipeline/iontorrent.nf.test.snap index c9c8f4bb..c7fbfb89 100644 --- a/tests/pipeline/iontorrent.nf.test.snap +++ b/tests/pipeline/iontorrent.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T01:42:35+0000" }, diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index e4fe28a0..75e2e374 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -63,7 +63,8 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/filtered-sequences.qza").exists() }, { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), - path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") } + path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/multi.nf.test.snap b/tests/pipeline/multi.nf.test.snap index 2f0095ac..25b1437c 100644 --- a/tests/pipeline/multi.nf.test.snap +++ b/tests/pipeline/multi.nf.test.snap @@ -14,7 +14,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:15:03+0000" }, diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index 39e1d2a2..144db928 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -52,7 +52,8 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test.snap b/tests/pipeline/pacbio_its.nf.test.snap index 3c860a89..775e5195 100644 --- a/tests/pipeline/pacbio_its.nf.test.snap +++ b/tests/pipeline/pacbio_its.nf.test.snap @@ -35,7 +35,7 @@ }, "software_versions": { "content": [ - "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{ASSIGNSH={pandas=1.1.5, python=3.9.1}, BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FORMAT_TAXRESULTS_STD={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_USEARCHGLOBAL={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T02:07:02+0000" }, diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index b78c479b..956e88b3 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -55,7 +55,9 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.taxonomy.per_query.tsv").exists() }, { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test.snap b/tests/pipeline/pplace.nf.test.snap index d0aa5f26..9ee79d29 100644 --- a/tests/pipeline/pplace.nf.test.snap +++ b/tests/pipeline/pplace.nf.test.snap @@ -8,7 +8,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, EPANG_PLACE={epang=0.3.8}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, GAPPA_ASSIGN={gappa=0.8.0}, GAPPA_GRAFT={gappa=0.8.0}, GAPPA_HEATTREE={gappa=0.8.0}, HMMER_AFAFORMATQUERY={hmmer/easel=0.48}, HMMER_AFAFORMATREF={hmmer/easel=0.48}, HMMER_HMMALIGNQUERY={hmmer=3.3.2}, HMMER_HMMALIGNREF={hmmer=3.3.2}, HMMER_HMMBUILD={hmmer=3.3.2}, HMMER_MASKQUERY={hmmer/easel=0.48}, HMMER_MASKREF={hmmer/easel=0.48}, HMMER_UNALIGNREF={hmmer/easel=0.48}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T17:24:03+0000" }, diff --git a/tests/pipeline/reftaxcustom.nf.test b/tests/pipeline/reftaxcustom.nf.test index 42e0d104..19035ccb 100644 --- a/tests/pipeline/reftaxcustom.nf.test +++ b/tests/pipeline/reftaxcustom.nf.test @@ -43,7 +43,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/reftaxcustom.nf.test.snap b/tests/pipeline/reftaxcustom.nf.test.snap index 6407a3bf..7b33f261 100644 --- a/tests/pipeline/reftaxcustom.nf.test.snap +++ b/tests/pipeline/reftaxcustom.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T21:18:54+0000" }, diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index be236c9a..08ddeca2 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -44,7 +44,9 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_single_end.tsv")).match("input") }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/single.nf.test.snap b/tests/pipeline/single.nf.test.snap index 49d65106..bd9096d0 100644 --- a/tests/pipeline/single.nf.test.snap +++ b/tests/pipeline/single.nf.test.snap @@ -13,7 +13,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, RENAME_RAW_DATA_FILES={sed=4.7}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T20:35:33+0000" }, diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index f6de2995..fb0c8c15 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -65,7 +65,8 @@ nextflow_pipeline { { assert new File("$outputDir/sintax/ASV_tax_sintax.unite-fungi.tsv").exists() }, { assert new File("$outputDir/sintax/ref_taxonomy_sintax.txt").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), - path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") } + path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/sintax.nf.test.snap b/tests/pipeline/sintax.nf.test.snap index c9745541..5f360a4b 100644 --- a/tests/pipeline/sintax.nf.test.snap +++ b/tests/pipeline/sintax.nf.test.snap @@ -16,7 +16,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, ITSX_CUTASV={ITSx=1.1.3}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, VSEARCH_SINTAX={vsearch=2.21.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-06-20T16:40:18+0000" }, diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index 7b295941..e8ba0ce0 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -93,7 +93,9 @@ nextflow_pipeline { path("$outputDir/SBDI/emof.tsv"), path("$outputDir/SBDI/event.tsv")).match("SBDI") }, { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, - { assert new File("$outputDir/SBDI/asv-table.tsv").exists() } + { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/test.nf.test.snap b/tests/pipeline/test.nf.test.snap index fdf84093..b345de55 100644 --- a/tests/pipeline/test.nf.test.snap +++ b/tests/pipeline/test.nf.test.snap @@ -22,7 +22,7 @@ }, "software_versions": { "content": [ - "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" + "{BARRNAP={barrnap=0.9}, CUSTOM_DUMPSOFTWAREVERSIONS={python=3.11.0, yaml=6.0}, CUTADAPT_BASIC={cutadapt=3.4}, DADA2_DENOISING={R=4.1.1, dada2=1.22.0}, DADA2_FILTNTRIM={R=4.1.1, dada2=1.22.0}, DADA2_QUALITY1={R=4.1.1, ShortRead=1.52.0, dada2=1.22.0}, DADA2_TAXONOMY={R=4.1.1, dada2=1.22.0}, FASTQC={fastqc=0.11.9}, FILTER_LEN_ASV={Biostrings=2.58.0, R=4.0.3}, FILTER_STATS={pandas=1.1.5, python=3.9.1}, PHYLOSEQ={R=4.3.0, phyloseq=1.44.0}, QIIME2_INSEQ={qiime2=2022.11.1}, RENAME_RAW_DATA_FILES={sed=4.7}, SBDIEXPORT={R=3.6.3}, TRUNCLEN={pandas=1.1.5, python=3.9.1}, Workflow={nf-core/ampliseq=2.7.0dev}}" ], "timestamp": "2023-05-28T20:55:32+0000" }, diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 3ef61dc1..04255de8 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -123,6 +123,9 @@ if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) if ( workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) { log.warn "Conda or mamba is enabled, any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." } } +// This tracks tax tables produced during pipeline and each table will be used during phyloseq +ch_tax_for_phyloseq = Channel.empty() + /* ======================================================================================== @@ -163,6 +166,8 @@ include { QIIME2_INTAX } from '../modules/local/qiime2_intax' include { PICRUST } from '../modules/local/picrust' include { SBDIEXPORT } from '../modules/local/sbdiexport' include { SBDIEXPORTREANNOTATE } from '../modules/local/sbdiexportreannotate' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_PPLACE } from '../modules/local/phyloseq_intax' +include { PHYLOSEQ_INTAX as PHYLOSEQ_INTAX_QIIME2 } from '../modules/local/phyloseq_intax' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -424,6 +429,7 @@ workflow AMPLISEQ { taxlevels ).tax.set { ch_dada2_tax } ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } ) } else { ch_dada2_tax = Channel.empty() } @@ -438,6 +444,7 @@ workflow AMPLISEQ { sintax_taxlevels ).tax.set { ch_sintax_tax } ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions) + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } ) } else { ch_sintax_tax = Channel.empty() } @@ -459,6 +466,7 @@ workflow AMPLISEQ { FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) } else { ch_pplace_tax = Channel.empty() } @@ -479,6 +487,7 @@ workflow AMPLISEQ { ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions.ifEmpty(null) ) //usually a .first() is here, dont know why this leads here to a warning ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv + ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) } else { ch_qiime2_tax = Channel.empty() } @@ -643,11 +652,8 @@ workflow AMPLISEQ { ch_tree_for_phyloseq = [] } - PHYLOSEQ_WORKFLOW ( - ch_dada2_tax.ifEmpty([]), - ch_sintax_tax.ifEmpty([]), - ch_pplace_tax.ifEmpty([]), - ch_qiime2_tax.ifEmpty([]), + PHYLOSEQ_WORKFLOW ( + ch_tax_for_phyloseq, ch_tsv, ch_metadata.ifEmpty([]), ch_tree_for_phyloseq, From 4baba8147f6ac30d39b5879fa4bd8fb19c3689b7 Mon Sep 17 00:00:00 2001 From: Adam Bennett Date: Thu, 10 Aug 2023 16:19:53 +0800 Subject: [PATCH 9/9] fixed file checks --- tests/pipeline/pplace.nf.test | 3 +-- tests/pipeline/single.nf.test | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 956e88b3..d348bee8 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -56,8 +56,7 @@ nextflow_pipeline { { assert new File("$outputDir/pplace/test_pplace.graft.test_pplace.epa_result.newick").exists() }, { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index 08ddeca2..1aa634a0 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -45,8 +45,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } ) } }