Skip to content

Commit

Permalink
add vsearch and script for plotting quality files
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Apr 5, 2024
1 parent f034ddd commit 7242b4c
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 11 deletions.
31 changes: 31 additions & 0 deletions bin/plot_quality_profile.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env Rscript
suppressPackageStartupMessages(library(dada2))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(optparse))

option_list = list(
make_option(c("-i","--id"),
type="character",
help="ID of sample"),
make_option(c("-f", "--file_pattern"),
type="character",
default = "fastq.gz",
help="File pattern [default \"%default\"]"),
make_option(c("--session"),
action = "store_true",
help="Generate nf-core compliant YAML file w/ version information",
),
make_option(c("--yaml"),
default = "versions.yml",
help="YAML file name (see --session)")
)

opt <- parse_args(OptionParser(option_list=option_list))

fns <- list.files(pattern=opt$file_pattern, full.names=TRUE)

pl <- plotQualityProfile(fns)
ggsave(paste0(opt$id,".qualities.pdf"), plot=pl, device="pdf")

# we may revisit the quality scores and other info in this plot for other purposes
saveRDS(pl, paste0(opt$id,".qualities.RDS"))
67 changes: 67 additions & 0 deletions modules/local/vsearch_eestats.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// TODO: at the moment I'm checking on the feasibility of using
// these for additional QC plots, esp on cum expected errors;
// at the moment they are a bit of a data dump

process VSEARCH_EESTATS {
tag "$meta.id"
label 'process_low'

container "quay.io/biocontainers/vsearch:2.27.0--h6a68c12_1"

input:
tuple val(meta), path(reads)

output:
path("${meta.id}.{R1,R2}.stats"), emit: stats
path("${meta.id}.{R1,R2}.eestats"), emit: eestats
path("${meta.id}.{R1,R2}.eestats2"), emit: eestats2

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
if (meta.single_end) {
"""
vsearch --fastq_stats ${reads[0]} \
--log ${meta.id}.R1.stats
vsearch --fastq_eestats ${reads[0]} \
--output ${meta.id}.R1.eestats
vsearch --fastq_eestats2 ${reads[0]} \
--ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \
--output ${meta.id}.R1.eestats2
"""
} else {
"""
vsearch --fastq_stats ${reads[0]} \
--log ${meta.id}.R1.stats
vsearch --fastq_stats ${reads[1]} \
--log ${meta.id}.R2.stats
vsearch --fastq_eestats ${reads[0]} \
--output ${meta.id}.R1.eestats
vsearch --fastq_eestats ${reads[1]} \
--output ${meta.id}.R2.eestats
vsearch --fastq_eestats2 ${reads[0]} \
--ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \
--output ${meta.id}.R1.eestats2
vsearch --fastq_eestats2 ${reads[1]} \
--ee_cutoffs "0.5,1.0,2.0,4.0,8.0" \
--output ${meta.id}.R2.eestats2
"""
}

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.PDF
"""
}
23 changes: 12 additions & 11 deletions workflows/tada.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@

include { FASTQC } from '../modules/nf-core/fastqc/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { PLOTQUALITYPROFILE } from '../modules/local/plotqualityprofile'
include { PLOT_QUALITY_PROFILE } from '../modules/local/plotqualityprofile'
include { VSEARCH_EESTATS } from '../modules/local/vsearch_eestats'

include { FILTER_AND_TRIM } from '../subworkflows/local/filter_and_trim'
include { DADA2_DENOISE } from '../subworkflows/local/dada2_denoise'

// // TODO: move to a subworkflow and implement pooled vs per-sample + optional priors
// include { LEARNERRORS } from '../modules/local/learnerrors'
// include { DADAINFER } from '../modules/local/dadainfer'
// include { POOLEDSEQTABLE } from '../modules/local/pooledseqtable'
// include { REMOVECHIMERAS } from '../modules/local/removechimeras'
// include { RENAMEASVS } from '../modules/local/renameasvs'
include { ASSIGNTAXASPECIES } from '../modules/local/assigntaxaspecies'
// TODO: may want to move into a subworkflow since we will likely implement a
// few additional methods (q2-feature-classifier, IDTAXA, etc)
include { ASSIGN_TAXA_SPECIES } from '../modules/local/assigntaxaspecies'

// TODO: Move into phylogenetic subworkflow
include { DECIPHER } from '../modules/local/decipher'
include { PHANGORN } from '../modules/local/phangorn'
include { FASTTREE } from '../modules/local/fasttree'
Expand Down Expand Up @@ -109,7 +108,11 @@ workflow TADA {

// TODO: we may want to allow aggregation of the read files for larger projects;
// current version is per read per sample
PLOTQUALITYPROFILE (
PLOT_QUALITY_PROFILE (
ch_samplesheet
)

VSEARCH_EESTATS (
ch_samplesheet
)

Expand Down Expand Up @@ -164,7 +167,6 @@ workflow TADA {
)

ch_tree = Channel.empty()
ch_tool = Channel.empty()

// this seems like the sort of thing a function map
// would be useful for...
Expand All @@ -188,7 +190,6 @@ workflow TADA {
)

// QC

READ_TRACKING(
FILTER_AND_TRIM.out.trimmed_report,
DADA2_DENOISE.out.seqtable_renamed,
Expand Down

0 comments on commit 7242b4c

Please sign in to comment.