From 0fe30831abbc2ed115e46e92330edf38f56edc3d Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 19 Mar 2024 13:01:20 +0100 Subject: [PATCH] add cram support to bowtie2 (#5180) * add cram support to bowtie2 * fix tests * fastq_align_bowtie2 > nf-test * fix tests * more robust test * fix cram tests --- modules/nf-core/bowtie2/align/main.nf | 26 ++- .../bowtie2/align/tests/cram_crai.config | 5 + .../nf-core/bowtie2/align/tests/main.nf.test | 162 +++++++++---- .../bowtie2/align/tests/main.nf.test.snap | 216 +++++++++++------- .../nf-core/fastq_align_bowtie2/main.nf | 6 +- .../fastq_align_bowtie2/tests/main.nf.test | 99 ++++++++ .../tests/main.nf.test.snap | 126 ++++++++++ .../tests}/nextflow.config | 9 - .../fastq_align_bowtie2/tests/tags.yml | 2 + subworkflows/nf-core/fastq_align_dna/main.nf | 12 +- tests/config/pytest_modules.yml | 3 - .../nf-core/fastq_align_bowtie2/main.nf | 44 ---- .../nf-core/fastq_align_bowtie2/test.yml | 63 ----- 13 files changed, 507 insertions(+), 266 deletions(-) create mode 100644 modules/nf-core/bowtie2/align/tests/cram_crai.config create mode 100644 subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test create mode 100644 subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap rename {tests/subworkflows/nf-core/fastq_align_bowtie2 => subworkflows/nf-core/fastq_align_bowtie2/tests}/nextflow.config (56%) create mode 100644 subworkflows/nf-core/fastq_align_bowtie2/tests/tags.yml delete mode 100644 tests/subworkflows/nf-core/fastq_align_bowtie2/main.nf delete mode 100644 tests/subworkflows/nf-core/fastq_align_bowtie2/test.yml diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index 8c405ee386c..96a7027ddbe 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -10,13 +10,18 @@ process BOWTIE2_ALIGN { input: tuple val(meta) , path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val save_unaligned val sort_bam output: - tuple val(meta), path("*.{bam,sam}"), emit: aligned + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*fastq.gz") , emit: fastq, optional:true + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true path "versions.yml" , emit: versions when: @@ -39,7 +44,10 @@ process BOWTIE2_ALIGN { def samtools_command = sort_bam ? 'sort' : 'view' def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ - def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` @@ -53,7 +61,7 @@ process BOWTIE2_ALIGN { $unaligned \\ $args \\ 2> >(tee ${prefix}.bowtie2.log >&2) \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.${extension} - + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz @@ -82,9 +90,19 @@ process BOWTIE2_ALIGN { } else { create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" } + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } """ touch ${prefix}.${extension} + ${create_index} touch ${prefix}.bowtie2.log ${create_unmapped} diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 00000000000..03f1d5e511f --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test index a478d17b542..03aeaf9eef3 100644 --- a/modules/nf-core/bowtie2/align/tests/main.nf.test +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -6,9 +6,10 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "bowtie2" + tag "bowtie2/build" tag "bowtie2/align" - test("sarscov2 - fastq, index, false, false - bam") { + test("sarscov2 - fastq, index, fasta, false, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -32,8 +33,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -42,7 +44,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -52,7 +54,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, false, false - sam") { + test("sarscov2 - fastq, index, fasta, false, false - sam") { config "./sam.config" setup { @@ -77,8 +79,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -87,7 +90,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).readLines()[0..4], + file(process.out.sam[0][1]).readLines()[0..4], process.out.log, process.out.fastq, process.out.versions @@ -97,7 +100,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, false, false - sam2") { + test("sarscov2 - fastq, index, fasta, false, false - sam2") { config "./sam2.config" setup { @@ -122,8 +125,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -132,7 +136,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).readLines()[0..4], + file(process.out.sam[0][1]).readLines()[0..4], process.out.log, process.out.fastq, process.out.versions @@ -142,7 +146,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, false, true - bam") { + test("sarscov2 - fastq, index, fasta, false, true - bam") { setup { run("BOWTIE2_BUILD") { @@ -166,8 +170,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = true //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -176,7 +181,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -186,7 +191,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, false, false - bam") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -213,8 +218,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -223,7 +229,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -233,7 +239,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, false, true - bam") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { setup { run("BOWTIE2_BUILD") { @@ -260,8 +266,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = true //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -270,7 +277,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -280,7 +287,7 @@ nextflow_process { } - test("sarscov2 - fastq, large_index, false, false - bam") { + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { config "./large_index.config" setup { @@ -305,8 +312,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -315,7 +323,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -325,7 +333,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], large_index, false, false - bam") { + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { config "./large_index.config" setup { @@ -353,8 +361,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -363,7 +372,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -373,7 +382,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, true, false - bam") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -400,8 +409,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = true //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -410,7 +420,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -420,7 +430,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, true, false - bam") { + test("sarscov2 - fastq, index, fasta, true, false - bam") { setup { run("BOWTIE2_BUILD") { @@ -444,8 +454,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = true //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -454,7 +465,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, process.out.log, process.out.fastq, process.out.versions @@ -465,7 +476,54 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, false, false - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { options "-stub" setup { @@ -493,8 +551,9 @@ nextflow_process { ] ] input[1] = BOWTIE2_BUILD.out.index - input[2] = false //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -503,7 +562,8 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, file(process.out.log[0][1]).name, process.out.fastq, process.out.versions @@ -513,7 +573,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, true, false - stub") { + test("sarscov2 - fastq, index, fasta, true, false - stub") { options "-stub" setup { @@ -538,8 +598,9 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] input[1] = BOWTIE2_BUILD.out.index - input[2] = true //save_unaligned - input[3] = false //sort + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort """ } } @@ -548,14 +609,15 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot( - file(process.out.aligned[0][1]).name, + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, file(process.out.log[0][1]).name, - file(process.out.fastq[0][1]).name, + process.out.fastq, process.out.versions ).match() } ) } } - + } diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap index 883dc7ecba1..028e7da6898 100644 --- a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -1,34 +1,38 @@ { - "sarscov2 - fastq, index, false, false - sam2": { + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { "content": [ - [ - "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA