Skip to content

Commit

Permalink
re-arrange param documentation and rename --input_multiregion to --mu…
Browse files Browse the repository at this point in the history
…ltiregion
  • Loading branch information
d4straub committed Mar 18, 2024
1 parent 8c711a7 commit 77e03b1
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 48 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/nf-core/ampliseq/master/assets/schema_input_multiregion.json",
"title": "nf-core/ampliseq pipeline - params.input_multiregion schema",
"description": "Schema for the file provided with params.input_multiregion",
"$id": "https://raw.githubusercontent.com/nf-core/ampliseq/master/assets/schema_multiregion.json",
"title": "nf-core/ampliseq pipeline - params.multiregion schema",
"description": "Schema for the file provided with params.multiregion",
"type": "array",
"items": {
"type": "object",
Expand Down
2 changes: 1 addition & 1 deletion conf/test_multiregion.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ params {
// Input data
input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/samplesheet_multiregion.tsv"
metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/metadata_multiregion.tsv"
input_multiregion = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/regions_multiregion.tsv"
multiregion = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/regions_multiregion.tsv"
sidle_ref_taxonomy = "greengenes88"

// Prevent default taxonomic classification
Expand Down
4 changes: 2 additions & 2 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,10 @@ Instead of relying on one short amplicon, scaffolding multiple regions along a r

For example, multiple variable regions of the 16S rRNA gene were sequenced with various primers and need to be unified. This leads to one unified abundance and taxonomy profile over all variable regions. However, ASV sequences are only available separately, there is no reconstruction of complete de-novo sequences feasible.

Required is information about sequencing data via [`--input`](#samplesheet-input), region primers length information via [`--input_multiregion`](https://nf-co.re/ampliseq/parameters#input_multiregion), and a taxonomic database via [`--sidle_ref_taxonomy`](https://nf-co.re/ampliseq/parameters#sidle_ref_taxonomy) or [`--sidle_ref_tax_custom`](https://nf-co.re/ampliseq/parameters#sidle_ref_tax_custom).
Required is information about sequencing data via [`--input`](#samplesheet-input), region primers length information via [`--multiregion`](https://nf-co.re/ampliseq/parameters#multiregion), and a taxonomic database via [`--sidle_ref_taxonomy`](https://nf-co.re/ampliseq/parameters#sidle_ref_taxonomy) or [`--sidle_ref_tax_custom`](https://nf-co.re/ampliseq/parameters#sidle_ref_tax_custom).

```bash
--input "samplesheet_multiregion.tsv" --input_multiregion "regions_multiregion.tsv" --sidle_ref_taxonomy "silva=128"
--input "samplesheet_multiregion.tsv" --multiregion "regions_multiregion.tsv" --sidle_ref_taxonomy "silva=128"
```

The region information file can be tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml) and can have two to four columns/entries with the following headers:
Expand Down
13 changes: 8 additions & 5 deletions lib/WorkflowAmpliseq.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class WorkflowAmpliseq {
Nextflow.error("Missing input declaration: One of `--input`, `--input_fasta`, `--input_folder` is required.")
}

if ( !params.input_multiregion && !params.input_fasta && (!params.FW_primer || !params.RV_primer) && !params.skip_cutadapt ) {
if ( !params.multiregion && !params.input_fasta && (!params.FW_primer || !params.RV_primer) && !params.skip_cutadapt ) {
Nextflow.error("Incompatible parameters: `--FW_primer` and `--RV_primer` are required for primer trimming. If primer trimming is not needed, use `--skip_cutadapt`.")
}

Expand Down Expand Up @@ -133,15 +133,18 @@ class WorkflowAmpliseq {
}

// When multi-region analysis is used, some parameter combinations are required or not allowed:
if ( params.input_multiregion ) {
if ( params.multiregion ) {
if ( !params.sidle_ref_taxonomy && !params.sidle_ref_tree_custom ) {
log.warn "Missing parameter: Either use `--sidle_ref_taxonomy` or `--sidle_ref_tree_custom` to get (unified) taxonomic classifications"
}
if ( (params.dada_ref_tax_custom || params.dada_ref_taxonomy) && !params.skip_dada_taxonomy ) {
Nextflow.error("Incompatible parameters: Multiple region analysis with `--input_multiregion` does not work with `--dada_ref_tax_custom`, `--dada_ref_taxonomy`")
Nextflow.error("Incompatible parameters: Multiple region analysis with `--multiregion` does not work with `--dada_ref_tax_custom`, `--dada_ref_taxonomy`")
}
if ( params.cut_its != "none" ) {
Nextflow.error("Incompatible parameters: Multiple region analysis with `--input_multiregion` does not work with `--cut_its`")
Nextflow.error("Incompatible parameters: Multiple region analysis with `--multiregion` does not work with `--cut_its`")
}
if ( params.vsearch_cluster || params.filter_ssu || params.min_len_asv || params.max_len_asv || params.filter_codons ) {
log.warn "Incompatible parameters: Multiple region analysis with `--input_multiregion` ignores any of `--vsearch_cluster`, `--filter_ssu`, `--min_len_asv`, `--max_len_asv`, `--filter_codons`, `--cut_its`"
log.warn "Incompatible parameters: Multiple region analysis with `--multiregion` ignores any of `--vsearch_cluster`, `--filter_ssu`, `--min_len_asv`, `--max_len_asv`, `--filter_codons`, `--cut_its`"
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params {
input = null
input_fasta = null
input_folder = null
input_multiregion = null
multiregion = null
extension = "/*_R{1,2}_001.fastq.gz"
pacbio = false
iontorrent = false
Expand Down
47 changes: 27 additions & 20 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,13 @@
"help_text": "This is optional, but for performing downstream analysis such as barplots, diversity indices or differential abundance testing, a metadata file is essential.\n\nRelated parameter:\n- `--metadata_category` (optional) to choose columns that are used for testing significance\n\nFor example:\n\n```bash\n--metadata \"path/to/metadata.tsv\"\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The metadata file has to follow the QIIME2 specifications (https://docs.qiime2.org/2021.2/tutorials/metadata/)\n\nThe first column in the tab-separated metadata file is the sample identifier column (required header: `ID`) and defines the sample or feature IDs associated with your study. In addition to the sample identifier column, the metadata file is required to have at least one column with multiple different non-numeric values but not all unique.\n**NB**: without additional columns there might be no groupings for the downstream analyses.\n\nSample identifiers should be 36 characters long or less, and also contain only ASCII alphanumeric characters (i.e. in the range of [a-z], [A-Z], or [0-9]), or the dash (-) character. For downstream analysis, by default all numeric columns, blanks or NA are removed, and only columns with multiple different values but not all unique are selected.\n\nThe columns which are to be assessed can be specified by `--metadata_category`. If `--metadata_category` isn't specified than all columns that fit the specification are automatically chosen.",
"fa_icon": "fas fa-file-csv"
},
"input_multiregion": {
"multiregion": {
"type": "string",
"mimetype": "text/tsv",
"fa_icon": "fas fa-dna",
"description": "Path to tab-separated multi region sheet",
"help_text": "Path to multi region sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml).",
"schema": "assets/schema_input_multiregion.json"
"help_text": "Path to multi region sheet, either tab-separated (.tsv), comma-separated (.csv), or in YAML format (.yml/.yaml).\n\nChoose a reerence taxonomy with `--sidle_ref_taxonomy` or use `--sidle_ref_tax_custom` and `--sidle_ref_tree_custom`.",
"schema": "assets/schema_multiregion.json"
},
"outdir": {
"type": "string",
Expand Down Expand Up @@ -340,7 +340,7 @@
"taxonomic_database": {
"title": "Taxonomic database",
"type": "object",
"default": "",
"default": "Choose a method and database for taxonomic assignments to single-region amplicons",
"properties": {
"dada_ref_taxonomy": {
"type": "string",
Expand Down Expand Up @@ -504,22 +504,6 @@
"minimum": 0,
"maximum": 1
},
"sidle_ref_taxonomy": {
"type": "string",
"help_text": "",
"description": "Name of supported database, and optionally also version number",
"enum": ["silva", "silva=128", "greengenes", "greengenes=13_8", "greengenes88"]
},
"sidle_ref_tax_custom": {
"type": "string",
"help_text": "Consider also setting `--sidle_ref_tree_custom`. Example usage: `--sidle_ref_tax_custom 'rep_set_99.fasta,rep_set_aligned_99.fasta,taxonomy_99_taxonomy.txt'`",
"description": "Comma separated paths to three files: reference taxonomy sequences (*.fasta), reference taxonomy strings (*.txt)"
},
"sidle_ref_tree_custom": {
"type": "string",
"help_text": "Overwrites tree chosen by `--sidle_ref_taxonomy`",
"description": "Path to SIDLE reference taxonomy tree (*.qza)"
},
"sintax_ref_taxonomy": {
"type": "string",
"help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `coidb=221216`) . This will download the desired database and initiate taxonomic classification with VSEARCH sintax and the chosen database, which if needed is formatted to produce a file that is compatible with VSEARCH sintax.\n\nThe following databases are supported:\n- COIDB - eukaryotic Cytochrome Oxidase I (COI) from The Barcode of Life Data System (BOLD) - COI\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n\nGenerally, using `coidb`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version.",
Expand Down Expand Up @@ -557,6 +541,29 @@
},
"fa_icon": "fas fa-database"
},
"multiregion_taxonomic_database": {
"title": "Multi-region taxonomic database",
"type": "object",
"default": "Choose database for taxonomic assignments with multi-region amplicons using SIDLE",
"properties": {
"sidle_ref_taxonomy": {
"type": "string",
"help_text": "",
"description": "Name of supported database, and optionally also version number",
"enum": ["silva", "silva=128", "greengenes", "greengenes=13_8", "greengenes88"]
},
"sidle_ref_tax_custom": {
"type": "string",
"help_text": "Consider also setting `--sidle_ref_tree_custom`. Example usage: `--sidle_ref_tax_custom 'rep_set_99.fasta,rep_set_aligned_99.fasta,taxonomy_99_taxonomy.txt'`",
"description": "Comma separated paths to three files: reference taxonomy sequences (*.fasta), reference taxonomy strings (*.txt)"
},
"sidle_ref_tree_custom": {
"type": "string",
"help_text": "Overwrites tree chosen by `--sidle_ref_taxonomy`",
"description": "Path to SIDLE reference taxonomy tree (*.qza)"
}
}
},
"asv_filtering": {
"title": "ASV filtering",
"type": "object",
Expand Down
32 changes: 16 additions & 16 deletions workflows/ampliseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1)
}

//only run QIIME2 downstream analysis when taxonomy is actually calculated and all required data is available
if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom || params.input_multiregion) ) {
if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime && !params.skip_qiime_downstream && (!params.skip_dada_taxonomy || params.sintax_ref_taxonomy || params.qiime_ref_taxonomy || params.qiime_ref_tax_custom || params.kraken2_ref_taxonomy || params.kraken2_ref_tax_custom || params.multiregion) ) {
run_qiime2 = true
} else {
run_qiime2 = false
Expand Down Expand Up @@ -310,10 +310,10 @@ workflow AMPLISEQ {
//
// Add primer info to sequencing files
//
if ( params.input_multiregion ) {
if ( params.multiregion ) {
// is multiple region analysis
ch_input_reads
.combine( Channel.fromSamplesheet("input_multiregion") )
.combine( Channel.fromSamplesheet("multiregion") )
.map{ info, reads, multi ->
def meta = info + multi
return [ meta, reads ] }
Expand Down Expand Up @@ -446,7 +446,7 @@ workflow AMPLISEQ {
//
// SUBWORKFLOW / MODULES : Taxonomic classification with DADA2, SINTAX and/or QIIME2
//
if ( params.input_multiregion ) {
if ( params.multiregion ) {
// separate sequences and abundances when several regions
DADA2_SPLITREGIONS (
//DADA2_DENOISING per run & region -> per run
Expand All @@ -473,10 +473,10 @@ workflow AMPLISEQ {
ch_dada2_asv = SIDLE_WF.out.table_tsv
ch_dada2_fasta = Channel.empty()
// Any ASV post-clustering param is not allowed:
// - solved by '!params.input_multiregion' for vsearch_cluster, filter_ssu, min_len_asv, max_len_asv, filter_codons
// - solved by '!params.multiregion' for vsearch_cluster, filter_ssu, min_len_asv, max_len_asv, filter_codons
// - solved in 'lib/WorkflowAmpliseq.groovy': cut_its
// Must have params:
// - solved by '!params.input_multiregion' for skip_report
// - solved by '!params.multiregion' for skip_report
// - solved in 'lib/WorkflowAmpliseq.groovy': skip_dada_taxonomy
} else {
// forward results to downstream analysis if single region
Expand All @@ -487,7 +487,7 @@ workflow AMPLISEQ {
//
// MODULE : ASV post-clustering with VSEARCH
//
if (params.vsearch_cluster && !params.input_multiregion) {
if (params.vsearch_cluster && !params.multiregion) {
ch_fasta_for_clustering = ch_dada2_fasta
.map {
fasta ->
Expand Down Expand Up @@ -515,7 +515,7 @@ workflow AMPLISEQ {
//
// Modules : Filter rRNA
//
if ( !params.skip_barrnap && params.filter_ssu && !params.input_multiregion ) {
if ( !params.skip_barrnap && params.filter_ssu && !params.multiregion ) {
BARRNAP ( ch_unfiltered_fasta )
BARRNAPSUMMARY ( BARRNAP.out.gff.collect() )
BARRNAPSUMMARY.out.warning.subscribe {
Expand All @@ -530,7 +530,7 @@ workflow AMPLISEQ {
ch_stats = MERGE_STATS_FILTERSSU.out.tsv
ch_dada2_fasta = FILTER_SSU.out.fasta
ch_dada2_asv = FILTER_SSU.out.asv
} else if ( !params.skip_barrnap && !params.filter_ssu && !params.input_multiregion ) {
} else if ( !params.skip_barrnap && !params.filter_ssu && !params.multiregion ) {
BARRNAP ( ch_unfiltered_fasta )
BARRNAPSUMMARY ( BARRNAP.out.gff.collect() )
BARRNAPSUMMARY.out.warning.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn "Barrnap could not identify any rRNA in the ASV sequences. We recommended to use the --skip_barrnap option for these sequences." }
Expand All @@ -545,7 +545,7 @@ workflow AMPLISEQ {
//
// Modules : amplicon length filtering
//
if ( (params.min_len_asv || params.max_len_asv) && !params.input_multiregion ) {
if ( (params.min_len_asv || params.max_len_asv) && !params.multiregion ) {
FILTER_LEN_ASV ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) )
ch_versions = ch_versions.mix(FILTER_LEN_ASV.out.versions.ifEmpty(null))
MERGE_STATS_FILTERLENASV ( ch_stats, FILTER_LEN_ASV.out.stats )
Expand All @@ -559,7 +559,7 @@ workflow AMPLISEQ {
//
// Modules : Filtering based on codons in an open reading frame
//
if ( params.filter_codons && !params.input_multiregion ) {
if ( params.filter_codons && !params.multiregion ) {
FILTER_CODONS ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) )
ch_versions = ch_versions.mix(FILTER_CODONS.out.versions.ifEmpty(null))
MERGE_STATS_CODONS( ch_stats, FILTER_CODONS.out.stats )
Expand Down Expand Up @@ -701,7 +701,7 @@ workflow AMPLISEQ {
// Import phylogenetic tree into QIIME2
if ( params.pplace_tree ) {
ch_tree = QIIME2_INTREE ( FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny ).qza
} else if (params.input_multiregion) {
} else if (params.multiregion) {
ch_tree = SIDLE_WF.out.tree_qza
} else { ch_tree = [] }

Expand All @@ -712,7 +712,7 @@ workflow AMPLISEQ {
ch_tax = Channel.empty()
tax_agglom_min = 1
tax_agglom_max = 2
} else if ( params.input_multiregion ) {
} else if ( params.multiregion ) {
log.info "Use multi-region SIDLE taxonomy classification"
val_used_taxonomy = "SIDLE"
ch_tax = SIDLE_WF.out.tax_qza
Expand Down Expand Up @@ -916,7 +916,7 @@ workflow AMPLISEQ {
//
// MODULE: Summary Report
//
if (!params.skip_report && !params.input_multiregion) {
if (!params.skip_report && !params.multiregion) {
SUMMARY_REPORT (
ch_report_template,
ch_report_css,
Expand Down Expand Up @@ -993,9 +993,9 @@ workflow AMPLISEQ {
file("${params.outdir}/input").mkdir()
file("${params.input_fasta}").copyTo("${params.outdir}/input")
}
if ( params.input_multiregion ) {
if ( params.multiregion ) {
file("${params.outdir}/input").mkdir()
file("${params.input_multiregion}").copyTo("${params.outdir}/input")
file("${params.multiregion}").copyTo("${params.outdir}/input")
}
//Save metadata in results folder
if ( params.metadata ) {
Expand Down

0 comments on commit 77e03b1

Please sign in to comment.