Skip to content

Commit

Permalink
updated modules to not have md5s, added decision logic to processing …
Browse files Browse the repository at this point in the history
…module, updated snpashots
  • Loading branch information
SimonDMurray committed Mar 20, 2024
1 parent 1230a7b commit 7bba07f
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 24 deletions.
4 changes: 3 additions & 1 deletion RESUME
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ set -euo pipefail

#nextflow run main.nf -profile docker,local,test --clean false -resume

nextflow run main.nf --input all-fqs.csv -profile docker,local --clean false -resume --r_processing true \
#nextflow run main.nf --input all-fqs.csv -profile docker,local --clean false -resume --r_processing true \

nextflow run main.nf --input all-fqs.csv -profile docker,local --clean false -resume \
--FW_primer "ATGCGATACTTGGTGTGAAT" --RV_primer "GCATATCAATAAGCGGAGGA" --single_end false --retain_untrimmed true \
--fastq_maxee 0.5 --fastq_minlen 250 --fastq_maxns 0 --fasta_width 0 \
--minuniquesize 2 --derep_strand "plus" --sizeout \
Expand Down
13 changes: 6 additions & 7 deletions modules/local/processing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,16 @@ process PROCESSING {
input:
tuple val(meta), path(sintax_tsv)

//Only process files that have taxonomy predictions
when:
sintax_tsv.size() > 0

output:
tuple val(meta), path('*.classified.tsv') , emit: fasta

script:
"""
if [ ! -s ${sintax_tsv} ]; then
echo "${sintax_tsv} has no sintax predictions" > "${meta.id}.classified.tsv"
else
echo -e "sample\tkingdom\tprob_kingdom\tdivision\tprob_division\tclade\tprob_clade\torder\tprob_order\tfamily\tprob_family\tgenus\tprob_genus\tspecies\tprob_species\tsize" > "${meta.id}.classified.tsv"
while read id size <&3 && read k p c o f g s <&4; do echo -e "\${id}\t\$(echo \${k} | sed 's/k://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${p} | sed 's/p://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${c} | sed 's/c://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${o} | sed 's/o://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${f} | sed 's/f://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${g} | sed 's/g://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${s} | sed 's/s://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${size} | sed 's/size=//g')"; done 3< <(cut -f 1 ${sintax_tsv} | tr ";" "\t") 4< <(cut -f 2 ${sintax_tsv} | tr "," "\t") >> "${meta.id}.classified.tsv"
fi
md5sum "${meta.id}.classified.tsv" > "${meta.id}.classified.tsv.md5"
echo -e "sample\tkingdom\tprob_kingdom\tdivision\tprob_division\tclade\tprob_clade\torder\tprob_order\tfamily\tprob_family\tgenus\tprob_genus\tspecies\tprob_species\tsize" > "${meta.id}.classified.tsv"
while read id size <&3 && read k p c o f g s <&4; do echo -e "\${id}\t\$(echo \${k} | sed 's/k://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${p} | sed 's/p://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${c} | sed 's/c://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${o} | sed 's/o://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${f} | sed 's/f://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${g} | sed 's/g://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${s} | sed 's/s://g' | sed 's/)//g' | tr "(" "\t")\t\$(echo \${size} | sed 's/size=//g')"; done 3< <(cut -f 1 ${sintax_tsv} | tr ";" "\t") 4< <(cut -f 2 ${sintax_tsv} | tr "," "\t") >> "${meta.id}.classified.tsv"
"""
}
3 changes: 0 additions & 3 deletions modules/local/vsearch_derep.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ process VSEARCH_DEREP_FULL_LENGTH {
--uc ${prefix}.derep.uc \\
--output ${prefix}.derep.fasta 2>&1 | tee ${prefix}.derep.log
md5sum "${prefix}.derep.fasta" > "${prefix}.derep.fasta.md5"
md5sum "${prefix}.derep.uc" > "${prefix}.derep.uc.md5"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
2 changes: 0 additions & 2 deletions modules/local/vsearch_fastq_filter.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ process VSEARCH_FASTQ_FILTER {
$args \\
--fastaout ${prefix}.filtered.fasta 2>&1 | tee ${prefix}.filtered.log
md5sum "${prefix}.filtered.fasta" > "${prefix}.filtered.fasta.md5"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//')
Expand Down
2 changes: 1 addition & 1 deletion tests/modules/local/processing.nf.test.snap
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-03-18T14:36:04.799821"
"timestamp": "2024-03-20T11:34:38.230732"
}
}
2 changes: 1 addition & 1 deletion tests/modules/local/r_processing.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ nextflow_process {
assert fasta.size() == 1
assert fasta[0].size() == 2
assert fasta[0][0].size() == 2
assert path(fasta[0].get(1)).md5 == '608d9417c5fc59d20087c3575935cec3'
assert path(fasta[0].get(1)).md5 == 'd7536874f239e304cca7667dc1ffde4d'
}
}

Expand Down
12 changes: 3 additions & 9 deletions tests/modules/local/r_processing.nf.test.snap
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,24 @@
"id": "ERR2537816",
"single_end": false
},
"ERR2537816.classified.tsv:md5,608d9417c5fc59d20087c3575935cec3"
"ERR2537816.classified.tsv:md5,d7536874f239e304cca7667dc1ffde4d"
]
],
"1": [
"versions.yml:md5,c5cafb5677872ce5c2c7cd3e9bd13e19"
],
"fasta": [
[
{
"id": "ERR2537816",
"single_end": false
},
"ERR2537816.classified.tsv:md5,608d9417c5fc59d20087c3575935cec3"
"ERR2537816.classified.tsv:md5,d7536874f239e304cca7667dc1ffde4d"
]
],
"versions": [
"versions.yml:md5,c5cafb5677872ce5c2c7cd3e9bd13e19"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-03-18T14:36:08.809864"
"timestamp": "2024-03-20T11:13:21.069775"
}
}

0 comments on commit 7bba07f

Please sign in to comment.