From cd0039a2d5e9be0feae161a4b346f7c135fe8a9b Mon Sep 17 00:00:00 2001 From: Ekaterina Sakharova Date: Mon, 7 Oct 2024 14:02:23 +0100 Subject: [PATCH] fixes --- configs/modules.config | 19 ++----------------- modules/local/checkv/main.nf | 6 +++--- modules/local/chromomap/main.nf | 4 ++-- modules/local/help.nf | 2 +- modules/local/hmmscan/main.nf | 12 ++++++------ modules/local/sankey/main.nf | 4 ++-- nextflow.config | 6 +----- nextflow_schema.json | 19 +++++++------------ subworkflows/local/annotate.nf | 26 ++++++-------------------- workflows/virify.nf | 7 ++++--- 10 files changed, 34 insertions(+), 71 deletions(-) diff --git a/configs/modules.config b/configs/modules.config index 1397346..09936f8 100644 --- a/configs/modules.config +++ b/configs/modules.config @@ -171,21 +171,6 @@ process { failOnError: false, pattern: "*_quality_summary.tsv" ], - [ - path: "${params.output}", - saveAs: { - filename -> { - if ( filename.equals('versions.yml') ) { - return null; - } - def output_file = new File(filename); - return "${meta.id}/${params.checkvdir}/${output_file.name}"; - } - }, - mode: params.publish_dir_mode, - failOnError: false, - pattern: "*.tsv" - ], ] } @@ -329,12 +314,12 @@ process { return null; } def output_file = new File(filename); - return "${meta.id}/${params.hmmerdir}/${params.dbs}/${output_file.name}"; + return "${meta.id}/${params.hmmerdir}/${params.databases}/${output_file.name}"; } }, mode: params.publish_dir_mode, failOnError: false, - pattern: "*_${params.dbs}_hmmscan.tbl" + pattern: "*_${params.databases}_hmmscan.tbl" ] ] } diff --git a/modules/local/checkv/main.nf b/modules/local/checkv/main.nf index 8f33964..049ea4e 100644 --- a/modules/local/checkv/main.nf +++ b/modules/local/checkv/main.nf @@ -4,11 +4,11 @@ process CHECKV { container 'quay.io/microbiome-informatics/checkv:0.8.1__1' input: - tuple val(meta), val(confidence_set_name), path(fasta), path(contigs) - file(database) + tuple val(meta), val(confidence_set_name), path(fasta) + path(database) output: - tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/") + tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv") script: diff --git a/modules/local/chromomap/main.nf b/modules/local/chromomap/main.nf index a3d99a6..7fb4393 100644 --- a/modules/local/chromomap/main.nf +++ b/modules/local/chromomap/main.nf @@ -1,6 +1,6 @@ process GENERATE_CHROMOMAP_TABLE { label 'process_single' - tag "${meta.id}" + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/bioruby:2.0.1' input: @@ -30,7 +30,7 @@ process GENERATE_CHROMOMAP_TABLE { process CHROMOMAP { label 'process_low' - tag "${meta.id}" + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/r_chromomap:0.3' input: diff --git a/modules/local/help.nf b/modules/local/help.nf index 42d2739..28e3a97 100644 --- a/modules/local/help.nf +++ b/modules/local/help.nf @@ -81,7 +81,7 @@ def helpMSG() { ${c_yellow}HPC computing:${c_reset} Especially for execution of the workflow on a HPC (LSF, SLURM) adjust the following parameters if needed: - --databases defines the path where databases are stored [default: $params.dbs] + --databases defines the path where databases are stored [default: $params.databases] --workdir defines the path where nextflow writes tmp files [default: $params.workdir] --singularity_cachedir defines the path where images (singularity) are cached [default: $params.singularity_cachedir] diff --git a/modules/local/hmmscan/main.nf b/modules/local/hmmscan/main.nf index 651c1e3..c8e4473 100644 --- a/modules/local/hmmscan/main.nf +++ b/modules/local/hmmscan/main.nf @@ -9,20 +9,20 @@ process HMMSCAN { path(db) output: - tuple val(meta), val(set_name), path("${set_name}_${params.dbs}_hmmscan.tbl"), path(faa) + tuple val(meta), val(set_name), path("${set_name}_${params.databases}_hmmscan.tbl"), path(faa) script: """ - if [[ ${params.dbs} == "viphogs" ]]; then + if [[ ${params.databases} == "viphogs" ]]; then if [[ ${params.version} == "v1" ]]; then - hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.dbs}_hmmscan.tbl ${db}/${db}.hmm ${faa} + hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa} else - hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.dbs}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa} + hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.databases}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa} #filter evalue for models that dont have any GA cutoff - awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.dbs}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl + awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.databases}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl fi else - hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.dbs}_hmmscan.tbl ${db}/${db}.hmm ${faa} + hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa} fi """ } diff --git a/modules/local/sankey/main.nf b/modules/local/sankey/main.nf index d81c875..0bc7171 100644 --- a/modules/local/sankey/main.nf +++ b/modules/local/sankey/main.nf @@ -1,6 +1,6 @@ process GENERATE_SANKEY_TABLE { label 'process_low' - tag "${meta.id}" + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/bioruby:2.0.1' input: @@ -23,7 +23,7 @@ process GENERATE_SANKEY_TABLE { process SANKEY { label 'process_low' - tag "${meta.id}" + tag "${meta.id} ${set_name}" container 'quay.io/microbiome-informatics/sankeyd3:0.12.3' input: diff --git a/nextflow.config b/nextflow.config index 559e7ef..8a08d30 100755 --- a/nextflow.config +++ b/nextflow.config @@ -63,7 +63,7 @@ params { finaldir = '08-final' // location for autodownload data like databases - dbs = 'nextflow-autodownload-databases' + databases = 'nextflow-autodownload-databases' // optional profile configurations, mostly necessary for HPC execution [lsf, slurm] workdir = 'work' @@ -96,14 +96,12 @@ profiles { cpus = params.max_cores } workDir = params.workdir - params.databases = params.dbs params.cloudProcess = false includeConfig 'configs/local.config' } lsf { workDir = params.workdir - params.databases = params.dbs executor { name = "lsf" queueSize = 200 @@ -114,7 +112,6 @@ profiles { slurm { workDir = params.workdir - params.databases = params.dbs executor { name = "slurm" queueSize = 200 @@ -150,7 +147,6 @@ profiles { cpus = params.max_cores } workDir = params.workdir - params.databases = params.dbs params.cloudProcess = false includeConfig 'configs/local.config' docker { enabled = true } diff --git a/nextflow_schema.json b/nextflow_schema.json index 12b5344..3e400e2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -148,19 +148,19 @@ "description": "Input parameters", "properties": { "virome": { - "type": "string", + "type": "boolean", "description": "VirSorter parameter, set when running a data set mostly composed of viruses" }, "chromomap": { - "type": "string", + "type": "boolean", "description": "feature to activate chromomap plot" }, "balloon": { - "type": "string", + "type": "boolean", "description": "feature to activate balloon plot" }, "onlyannotate": { - "type": "string", + "type": "boolean", "description": "Only annotate the input FASTA (no virus prediction, only contig length filtering)" }, "mashmap_len": { @@ -169,7 +169,7 @@ "description": "Mashmap mapping segment length, shorter sequences will be ignored" }, "mashmap": { - "type": "string", + "type": "boolean", "description": "Map the viral contigs against the provided reference" }, "evalue": { @@ -189,7 +189,7 @@ }, "factor": { "type": "string", - "default": "/Users/kates/Desktop/EBI/MGnify/pipelines/emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv", + "default": "emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv", "description": "Path to file with viral assemblies metadata, including taxon-specific factors" }, "sankey": { @@ -292,7 +292,7 @@ "fa_icon": "fas fa-dna", "description": "Nextflow arguments", "properties": { - "dbs": { + "databases": { "type": "string", "default": "nextflow-autodownload-databases", "description": "directory path to databases" @@ -307,11 +307,6 @@ "default": "singularity", "description": "singularity folder" }, - "databases": { - "type": "string", - "default": "nextflow-autodownload-databases", - "description": "directory path to databases" - }, "cloudProcess": { "type": "boolean", "description": "run on cloud" diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf index ec93263..6c672ce 100644 --- a/subworkflows/local/annotate.nf +++ b/subworkflows/local/annotate.nf @@ -93,35 +93,21 @@ workflow ANNOTATE { } CHECKV( - predicted_contigs.join(contigs.map { meta, fasta -> fasta }), - checkv_db + predicted_contigs, + checkv_db.first() ) - - viphos_annotations = ANNOTATION.out.annotations.groupTuple().map{ - meta, values -> { - def annotations = values.collect{it[1]}; - return [meta, annotations] } - } - taxonomy_annotations = ASSIGN.out.groupTuple().map{ - meta, values -> { - def taxonomy = values.collect{it[1]}; - return [meta, taxonomy] } - } - checkv_results = CHECKV.out.groupTuple().map{ - meta, values -> { - def quality_summary = values.collect{it[1]}; - return [meta, quality_summary] } - } + + viphos_annotations = ANNOTATION.out.annotations.map{meta, type, annotation -> [meta, annotation]}.groupTuple() + taxonomy_annotations = ASSIGN.out.map{meta, type, annotation -> [meta, annotation]}.groupTuple() + checkv_results = CHECKV.out.map{meta, type, quality -> [meta, quality]}.groupTuple() WRITE_GFF( contigs.join(viphos_annotations).join(taxonomy_annotations).join(checkv_results) ) - chromomap_ch = Channel.empty() predicted_contigs_filtered = predicted_contigs.map { meta, set_name, fasta -> [set_name, meta, fasta] } plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { meta, set_name, dir, table -> [set_name, table] } chromomap_ch = predicted_contigs_filtered.join(plot_contig_map_filtered).map { set_name, assembly_name, fasta, tsv -> [assembly_name, set_name, fasta, tsv]} - chromomap_ch.view() emit: assign_output = ASSIGN.out diff --git a/workflows/virify.nf b/workflows/virify.nf index e687440..3344c2b 100755 --- a/workflows/virify.nf +++ b/workflows/virify.nf @@ -34,7 +34,7 @@ if ( params.samplesheet ) { samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json")) input_ch = samplesheet.map(groupReads) } -input_ch.view() + // one sample of assembly if (params.fasta) { input_ch = Channel.fromPath( params.fasta, checkIfExists: true) @@ -113,11 +113,12 @@ workflow VIRIFY { DOWNLOAD_DATABASES.out.virfinder_db, DOWNLOAD_DATABASES.out.pprmeta_git ) - postprocess_input_ch = DETECT.out + // (meta, fasta, map) + postprocess_input_ch = DETECT.out.detect_output } // ----------- POSTPROCESS: restore fasta file - POSTPROCESS(postprocess_input_ch) + POSTPROCESS(postprocess_input_ch) // out: (meta, type(HC/LC/PP), fasta) // ----------- ANNOTATE ANNOTATE(