From 2332cfe23f03279cbfdcb3b2404fdde163495fec Mon Sep 17 00:00:00 2001 From: Jeronimo Vazquez Date: Mon, 19 May 2025 20:54:46 +0000 Subject: [PATCH 1/2] fix starfusion local module from rnafusion to migrate it to nf-core/modules --- .../nf-core/starfusion/build/environment.yml | 2 +- modules/nf-core/starfusion/build/main.nf | 23 +++++- modules/nf-core/starfusion/build/meta.yml | 11 ++- .../starfusion/build/tests/main.nf.test | 10 ++- .../starfusion/build/tests/main.nf.test.snap | 74 +++++++++---------- 5 files changed, 74 insertions(+), 46 deletions(-) diff --git a/modules/nf-core/starfusion/build/environment.yml b/modules/nf-core/starfusion/build/environment.yml index ef7f9316061..28e0c4df6f3 100644 --- a/modules/nf-core/starfusion/build/environment.yml +++ b/modules/nf-core/starfusion/build/environment.yml @@ -5,4 +5,4 @@ dependencies: - bioconda::dfam=3.7 - bioconda::hmmer=3.4 - bioconda::minimap2=2.28 - - bioconda::star-fusion=1.14.0 + - bioconda::star-fusion=1.15.0 diff --git a/modules/nf-core/starfusion/build/main.nf b/modules/nf-core/starfusion/build/main.nf index a687cbd7bcb..eb1a7072a29 100644 --- a/modules/nf-core/starfusion/build/main.nf +++ b/modules/nf-core/starfusion/build/main.nf @@ -4,14 +4,16 @@ process STARFUSION_BUILD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' : - 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0a58b9d30f9c72b22135f85746e10596d568c40a7d9634b13e0a0749cacd21b/data' : + 'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:c2bc5374f142ac93'}" input: tuple val(meta), path(fasta) tuple val(meta2), path(gtf) path fusion_annot_lib val dfam_species + val dfam_version + val pfam_version output: tuple val(meta), path("${prefix}_genome_lib_build_dir"), emit: reference @@ -24,18 +26,30 @@ process STARFUSION_BUILD { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ + wget http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam${pfam_version}/Pfam-A.hmm.gz --no-check-certificate + wget https://www.dfam.org/releases/Dfam_${dfam_version}/infrastructure/dfamscan/${dfam_species}_dfam.hmm --no-check-certificate + wget https://www.dfam.org/releases/Dfam_${dfam_version}/infrastructure/dfamscan/${dfam_species}_dfam.hmm.h3f --no-check-certificate + wget https://www.dfam.org/releases/Dfam_${dfam_version}/infrastructure/dfamscan/${dfam_species}_dfam.hmm.h3i --no-check-certificate + wget https://www.dfam.org/releases/Dfam_${dfam_version}/infrastructure/dfamscan/${dfam_species}_dfam.hmm.h3m --no-check-certificate + wget https://www.dfam.org/releases/Dfam_${dfam_version}/infrastructure/dfamscan/${dfam_species}_dfam.hmm.h3p --no-check-certificate + gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm + wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/AnnotFilterRule.pm -O AnnotFilterRule.pm --no-check-certificate + + prep_genome_lib.pl \\ --genome_fa $fasta \\ --gtf $gtf \\ - --dfam_db ${dfam_species} \\ - --pfam_db current \\ + --dfam_db ${dfam_species}_dfam.hmm \\ + --pfam_db Pfam-A.hmm \\ --fusion_annot_lib $fusion_annot_lib \\ + --annot_filter_rule AnnotFilterRule.pm \\ --CPU $task.cpus \\ --output_dir ${prefix}_genome_lib_build_dir \\ ${args} cat <<-END_VERSIONS > versions.yml "${task.process}": + wget: \$(wget --version | head -1 | cut -d ' ' -f 3) STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') END_VERSIONS """ @@ -128,6 +142,7 @@ process STARFUSION_BUILD { cat <<-END_VERSIONS > versions.yml "${task.process}": + wget: \$(wget --version | head -1 | cut -d ' ' -f 3) STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //') END_VERSIONS """ diff --git a/modules/nf-core/starfusion/build/meta.yml b/modules/nf-core/starfusion/build/meta.yml index c6635b02e8d..1af1adfbb79 100644 --- a/modules/nf-core/starfusion/build/meta.yml +++ b/modules/nf-core/starfusion/build/meta.yml @@ -20,7 +20,7 @@ input: type: map description: Metadata map required: true - - fasta: + - - fasta: type: file description: Input FASTA file pattern: "*.{fa,fasta}" @@ -31,7 +31,7 @@ input: type: map description: Second metadata map required: true - - gtf: + - - gtf: type: file description: Input GTF (Gene Transfer Format) file pattern: "*.gtf" @@ -47,6 +47,13 @@ input: - - dfam_species: type: string description: Dfam species name + - - dfam_version: + type: string + description: DNA transposable element database version, required for repeat masking. Values such as '3.4' are accepted (will automatically pull the resources from dfam). + - - pfam_version: + type: string + description: Protein families database version, to get domain information. Values such as '37.4' are accepted (will automatically pull the resources from Pfam). + output: - reference: - meta: diff --git a/modules/nf-core/starfusion/build/tests/main.nf.test b/modules/nf-core/starfusion/build/tests/main.nf.test index bbaab0ae59d..13f8da66f73 100644 --- a/modules/nf-core/starfusion/build/tests/main.nf.test +++ b/modules/nf-core/starfusion/build/tests/main.nf.test @@ -22,7 +22,10 @@ nextflow_process { ] input [2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/CTAT_HumanFusionLib.mini.dat.gz') - input [3] = "human" + input [3] = "homo_sapiens" + input [4] = "3.8" + input [5] = "37.4" + """ } } @@ -124,7 +127,10 @@ nextflow_process { ] input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz") - input [3] = "human" + input [3] = "homo_sapiens" + input [4] = "3.8" + input [5] = "37.4" + """ } } diff --git a/modules/nf-core/starfusion/build/tests/main.nf.test.snap b/modules/nf-core/starfusion/build/tests/main.nf.test.snap index 473c7aa73c6..19cb0902dfc 100644 --- a/modules/nf-core/starfusion/build/tests/main.nf.test.snap +++ b/modules/nf-core/starfusion/build/tests/main.nf.test.snap @@ -1,7 +1,7 @@ { "STARFUSION_BUILD - human - minigenome": { "content": [ - "AnnotFilterRule.pm:md5,5391fcc58d9c71cd1f0e45668c5ec597", + "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", "blast_pairs.dat.gz", "blast_pairs.idx", "annotfiltrule_cp.ok", @@ -27,7 +27,7 @@ "trans.blast.dat.cp.ok", "trans.blast.dat.index.ok", "validate_ctat_genome_lib.ok", - "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", + "fusion_annot_lib.gz:md5,68b329da9893e34099c7d8ad5cb9c940", "fusion_annot_lib.idx", "pfam_domains.dbm", "PFAM.domtblout.dat.gz", @@ -36,53 +36,53 @@ "ref_annot.cds", "ref_annot.cdsplus.fa", "ref_annot.cdsplus.fa.idx", - "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", + "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_annot.gtf.gene_spans", - "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", - "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", + "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_annot.pep", "ref_annot.prot_info.dbm", - "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", - "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", - "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", - "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", - "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", + "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_genome.fa.nin", "ref_genome.fa.njs", - "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", - "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", - "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", - "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", + "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", "ref_genome.fa.star.idx", "build.ok", - "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", - "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", - "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", - "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", - "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", - "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", - "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", - "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", + "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", "genomeParameters.txt", "Log.out", - "SA:md5,7dd9083264be9c6a2194d990bc10d237", - "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", - "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", - "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", - "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", - "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", - "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", + "SA:md5,d41d8cd98f00b204e9800998ecf8427e", + "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", + "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", "trans.blast.align_coords.align_coords.dbm", - "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128", + "trans.blast.dat.gz:md5,68b329da9893e34099c7d8ad5cb9c940", [ - "versions.yml:md5,4a6ed47cb345f2a5af4d9f12b7918cdb" + "versions.yml:md5,99533491facb961016dafc1d45a8101e" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.04.2" }, - "timestamp": "2025-03-31T19:58:54.583793842" + "timestamp": "2025-05-19T20:48:57.135915873" }, "STARFUSION_BUILD - human - minigenome - stub": { "content": [ @@ -173,7 +173,7 @@ ] ], "1": [ - "versions.yml:md5,4a6ed47cb345f2a5af4d9f12b7918cdb" + "versions.yml:md5,99533491facb961016dafc1d45a8101e" ], "reference": [ [ @@ -261,14 +261,14 @@ ] ], "versions": [ - "versions.yml:md5,4a6ed47cb345f2a5af4d9f12b7918cdb" + "versions.yml:md5,99533491facb961016dafc1d45a8101e" ] } ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nextflow": "25.04.2" }, - "timestamp": "2025-03-28T23:09:01.034159236" + "timestamp": "2025-05-19T19:52:36.457569412" } } \ No newline at end of file From e3138c8fd922bb8fff569fe0763a786c3e778468 Mon Sep 17 00:00:00 2001 From: Jeronimo Vazquez Date: Tue, 20 May 2025 17:41:47 +0000 Subject: [PATCH 2/2] fix nf-core modules lint errors --- modules/nf-core/starfusion/build/meta.yml | 12 ++-- .../starfusion/build/tests/main.nf.test.snap | 62 +++++++++---------- 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/modules/nf-core/starfusion/build/meta.yml b/modules/nf-core/starfusion/build/meta.yml index 1af1adfbb79..699ba561275 100644 --- a/modules/nf-core/starfusion/build/meta.yml +++ b/modules/nf-core/starfusion/build/meta.yml @@ -20,7 +20,7 @@ input: type: map description: Metadata map required: true - - - fasta: + - fasta: type: file description: Input FASTA file pattern: "*.{fa,fasta}" @@ -31,7 +31,7 @@ input: type: map description: Second metadata map required: true - - - gtf: + - gtf: type: file description: Input GTF (Gene Transfer Format) file pattern: "*.gtf" @@ -49,11 +49,13 @@ input: description: Dfam species name - - dfam_version: type: string - description: DNA transposable element database version, required for repeat masking. Values such as '3.4' are accepted (will automatically pull the resources from dfam). + description: DNA transposable element database version, required for repeat + masking. Values such as '3.4' are accepted (will automatically pull the resources + from dfam). - - pfam_version: type: string - description: Protein families database version, to get domain information. Values such as '37.4' are accepted (will automatically pull the resources from Pfam). - + description: Protein families database version, to get domain information. Values + such as '37.4' are accepted (will automatically pull the resources from Pfam). output: - reference: - meta: diff --git a/modules/nf-core/starfusion/build/tests/main.nf.test.snap b/modules/nf-core/starfusion/build/tests/main.nf.test.snap index 19cb0902dfc..cca79aae099 100644 --- a/modules/nf-core/starfusion/build/tests/main.nf.test.snap +++ b/modules/nf-core/starfusion/build/tests/main.nf.test.snap @@ -1,7 +1,7 @@ { "STARFUSION_BUILD - human - minigenome": { "content": [ - "AnnotFilterRule.pm:md5,d41d8cd98f00b204e9800998ecf8427e", + "AnnotFilterRule.pm:md5,f94966013cd0df9624a6dda0b75fefa0", "blast_pairs.dat.gz", "blast_pairs.idx", "annotfiltrule_cp.ok", @@ -27,7 +27,7 @@ "trans.blast.dat.cp.ok", "trans.blast.dat.index.ok", "validate_ctat_genome_lib.ok", - "fusion_annot_lib.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "fusion_annot_lib.gz:md5,23d82a5da81f91ca4e1ecd6481992a12", "fusion_annot_lib.idx", "pfam_domains.dbm", "PFAM.domtblout.dat.gz", @@ -36,44 +36,44 @@ "ref_annot.cds", "ref_annot.cdsplus.fa", "ref_annot.cdsplus.fa.idx", - "ref_annot.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf:md5,5ce8afe99ef3940a877a04caeacf9181", "ref_annot.gtf.gene_spans", - "ref_annot.gtf.mini.sortu:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_annot.gtf.mm2.splice.bed:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_annot.gtf.mini.sortu:md5,1d29ccecdbb7b40a99c84a02d6c2c1be", + "ref_annot.gtf.mm2.splice.bed:md5,340585ea1843bf06bf555575ddecf28c", "ref_annot.pep", "ref_annot.prot_info.dbm", - "ref_genome.fa:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.fai:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.mm2:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ndb:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nhr:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa:md5,ad699c56ed38566c7d3e9579486b1706", + "ref_genome.fa.fai:md5,e3f74a27219b33ae80fd5de5cbeaf32b", + "ref_genome.fa.mm2:md5,ce50979ea284748eb9f84ae88cfd930e", + "ref_genome.fa.ndb:md5,6ea574753b557610f62f6e4ab79e19f5", + "ref_genome.fa.nhr:md5,50f28dae71683c4394bfaf94a1ef4392", "ref_genome.fa.nin", "ref_genome.fa.njs", - "ref_genome.fa.not:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nsq:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.ntf:md5,d41d8cd98f00b204e9800998ecf8427e", - "ref_genome.fa.nto:md5,d41d8cd98f00b204e9800998ecf8427e", + "ref_genome.fa.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "ref_genome.fa.nsq:md5,d2361e7871ce4cf51181c112a48f191b", + "ref_genome.fa.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "ref_genome.fa.nto:md5,33cdeccccebe80329f1fdbee7f5874cb", "ref_genome.fa.star.idx", "build.ok", - "chrLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chrNameLength.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chrName.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "chrStart.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "exonGeTrInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", - "exonInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", - "geneInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", - "Genome:md5,d41d8cd98f00b204e9800998ecf8427e", + "chrLength.txt:md5,e02cd536b7281b894246863b160d5d06", + "chrNameLength.txt:md5,07a67d7ac441d7d30d80840b0927e717", + "chrName.txt:md5,f4d0d6595f423084e6b9472e40dfe6e8", + "chrStart.txt:md5,e2031239a74fe5ee9051e9364e4f608a", + "exonGeTrInfo.tab:md5,3c35618d07a8e35a0f9108699fcdda42", + "exonInfo.tab:md5,bcbb3f32fa31fe504cc737f337ad341c", + "geneInfo.tab:md5,db5db4b6e003904e9908fce7c05f0125", + "Genome:md5,9e3efdd0901cabb5a2d589664a63b372", "genomeParameters.txt", "Log.out", - "SA:md5,d41d8cd98f00b204e9800998ecf8427e", - "SAindex:md5,d41d8cd98f00b204e9800998ecf8427e", - "sjdbInfo.txt:md5,d41d8cd98f00b204e9800998ecf8427e", - "sjdbList.fromGTF.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", - "sjdbList.out.tab:md5,d41d8cd98f00b204e9800998ecf8427e", - "transcriptInfo.tab:md5,d41d8cd98f00b204e9800998ecf8427e", - "trans.blast.align_coords.align_coords.dat:md5,d41d8cd98f00b204e9800998ecf8427e", + "SA:md5,7dd9083264be9c6a2194d990bc10d237", + "SAindex:md5,ac4711df685109e04356db9e9cb9fb7f", + "sjdbInfo.txt:md5,e4cc1bbf8bd687cfc3d7c2c702e6def7", + "sjdbList.fromGTF.out.tab:md5,8f3e8604b00d4067e4eb80aa476a8113", + "sjdbList.out.tab:md5,5d78dd49d5db24ca2c056b7ebe5c2059", + "transcriptInfo.tab:md5,b758c0ccaddcf0453bab5905b3cec4a2", + "trans.blast.align_coords.align_coords.dat:md5,9f6b7a75aea03a9671190be25ecdd4c2", "trans.blast.align_coords.align_coords.dbm", - "trans.blast.dat.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "trans.blast.dat.gz:md5,85ba5ea96c566f751ad83a3e4b8ab128", [ "versions.yml:md5,99533491facb961016dafc1d45a8101e" ] @@ -82,7 +82,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-19T20:48:57.135915873" + "timestamp": "2025-05-20T17:11:52.797058494" }, "STARFUSION_BUILD - human - minigenome - stub": { "content": [