Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add module telseq #6678

Merged
merged 16 commits into from
Sep 20, 2024
6 changes: 6 additions & 0 deletions modules/nf-core/telseq/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::samtools=1.20
- bioconda::telseq=0.0.2
62 changes: 62 additions & 0 deletions modules/nf-core/telseq/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process TELSEQ {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-5ce2a0c04652b0d0cc87f012a2240e1e5a90bc90:eb084e8aa92146f3987d00af8d38b36214d1f39f-0':
'biocontainers/mulled-v2-5ce2a0c04652b0d0cc87f012a2240e1e5a90bc90:eb084e8aa92146f3987d00af8d38b36214d1f39f-0' }"

input:
tuple val(meta ), path(bam), path(bai)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(bed)

output:
tuple val(meta), path("*.telseq.tsv"), emit: output
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def exome = bed ? " --exomebed=${bed}" : ""
"""
# telseq doesn't support CRAM. See https://github.com/zd1/telseq/issues/26
if ${bam.name.endsWith(".cram")}
then
samtools view -T ${fasta} -O BAM --uncompressed ${bam} |\\
telseq ${args} ${exome} - > tmp.tsv
else
telseq ${args} ${exome} ${bam} > tmp.tsv
fi

#
# 'bug' in telseq, messages that should be printed on stderr are printed on stdout
# We remove them with awk
#
awk '/^ReadGroup/ {ok=1;} {if(ok) print;}' tmp.tsv > ${prefix}.telseq.tsv
rm tmp.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
telseq: \$(telseq --help 2>&1 | grep "^Version" -m1 | cut -d ' ' -f2)
samtools: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.telseq.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
telseq: \$(telseq --help 2>&1 | grep "^Version" -m1 | cut -d ' ' -f2)
samtools: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""
}
89 changes: 89 additions & 0 deletions modules/nf-core/telseq/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "telseq"
description: "Telseq: a software for calculating telomere length"
keywords:
- bam
- cram
- genomics
- samtools
- telomere
- telseq
tools:
- "telseq":
description: "A software for calculating telomere length"
homepage: "https://github.com/zd1/telseq"
documentation: "https://github.com/zd1/telseq"
tool_dev_url: "https://github.com/zd1/telseq"
doi: "10.1093/nar/gku181"
licence: ["GPL v3"]
args_id: "$args"
identifier: ""
- samtools:
description: Tools for dealing with SAM, BAM and CRAM files
homepage: http://www.htslib.org/
documentation: http://www.htslib.org/doc/samtools.html
tool_dev_url: https://github.com/samtools/samtools
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
identifier: "biotools:samtools"
input:
- - "meta ":
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'genome' ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
- bai:
type: file
description: bam index file
pattern: "*.{bai,crai}"
- - meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Reference genome file
pattern: "*.{fa,fasta}"
- - meta3:
type: map
description: |
Groovy Map containing reference index information
e.g. [ id:'genome' ]
- fai:
type: file
description: Fasta index file
pattern: "*.fai"
- - meta4:
type: map
description: |
Groovy Map containing bed information
e.g. [ id:'genome' ]
- bed:
type: file
description: Optional exome regions in BED format. These regions will be excluded
pattern: "*.bed"
output:
- output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.telseq.tsv":
type: file
description: Telseq output
pattern: "*.tsv"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@lindenb"
maintainers:
- "@lindenb"
122 changes: 122 additions & 0 deletions modules/nf-core/telseq/tests/main.nf.test
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you could add a test where you have the 4th input file too? The bed file

Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// nf-core modules test telseq
nextflow_process {

name "Test Process TELSEQ"
script "../main.nf"
process "TELSEQ"

tag "modules"
tag "modules_nfcore"
tag "telseq"

test("sarscov2 - cram") {
when {
process {
"""
input[0] = [
[ id:'cram' ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true)
]
input[1] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
input[2] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ]
input[3] = [ [] , [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.output[0][1],
process.out.versions).match() }
)
}
}

test("sarscov2 - bam") {
when {
process {
"""
input[0] = [
[ id:'bam' ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
input[2] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ]
input[3] = [ [] , [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.output[0][1],
process.out.versions).match() }
)
}
}

test("sarscov2 - bam - bed") {
when {
process {
"""
input[0] = [
[ id:'bambed' ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
input[2] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ]
input[3] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.output[0][1],
process.out.versions).match() }
)
}
}



test("sarscov2 - bam - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'stub', single_end:false ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
]
input[1] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
input[2] = [ [] , file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ]
input[3] = [ [] , [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.output[0][1],
process.out.versions).match() }
)
}

}

}
54 changes: 54 additions & 0 deletions modules/nf-core/telseq/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"sarscov2 - bam - stub": {
"content": [
"stub.telseq.tsv:md5,d41d8cd98f00b204e9800998ecf8427e",
[
"versions.yml:md5,f0569759735b18ce1064c9ed2ddc9610"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-20T14:09:45.936712383"
},
"sarscov2 - cram": {
"content": [
"cram.telseq.tsv:md5,82005b49e17f114e8ad759dd9d520912",
[
"versions.yml:md5,f0569759735b18ce1064c9ed2ddc9610"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-20T14:09:09.775504172"
},
"sarscov2 - bam": {
"content": [
"bam.telseq.tsv:md5,82005b49e17f114e8ad759dd9d520912",
[
"versions.yml:md5,f0569759735b18ce1064c9ed2ddc9610"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-20T14:09:27.747371336"
},
"sarscov2 - bam - bed": {
"content": [
"bambed.telseq.tsv:md5,58374f99d395fa64ea561aca73fd6c89",
[
"versions.yml:md5,f0569759735b18ce1064c9ed2ddc9610"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-20T14:09:27.747371336"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/telseq/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
telseq:
- "modules/nf-core/telseq/**"
Loading