fixes

EBI-Metagenomics · Oct 7, 2024 · cd0039a · cd0039a
1 parent 0379788
commit cd0039a
Show file tree

Hide file tree

Showing 10 changed files with 34 additions and 71 deletions.
diff --git a/configs/modules.config b/configs/modules.config
@@ -171,21 +171,6 @@ process {
                 failOnError: false,
                 pattern: "*_quality_summary.tsv"
             ],
-            [
-                path: "${params.output}",
-                saveAs: {
-                    filename -> {
-                        if ( filename.equals('versions.yml') ) {
-                            return null;
-                        }
-                        def output_file = new File(filename);
-                        return "${meta.id}/${params.checkvdir}/${output_file.name}";
-                    }
-                },
-                mode: params.publish_dir_mode,
-                failOnError: false,
-                pattern: "*.tsv"
-            ],
         ]
     }
 
@@ -329,12 +314,12 @@ process {
                             return null;
                         }
                         def output_file = new File(filename);
-                        return "${meta.id}/${params.hmmerdir}/${params.dbs}/${output_file.name}";
+                        return "${meta.id}/${params.hmmerdir}/${params.databases}/${output_file.name}";
                     }
                 },
                 mode: params.publish_dir_mode,
                 failOnError: false,
-                pattern: "*_${params.dbs}_hmmscan.tbl"
+                pattern: "*_${params.databases}_hmmscan.tbl"
             ]
         ]
     }

diff --git a/modules/local/checkv/main.nf b/modules/local/checkv/main.nf
@@ -4,11 +4,11 @@ process CHECKV {
     container 'quay.io/microbiome-informatics/checkv:0.8.1__1'
 
     input:
-        tuple val(meta), val(confidence_set_name), path(fasta), path(contigs)
-        file(database)
+        tuple val(meta), val(confidence_set_name), path(fasta)
+        path(database)
 
     output:
-        tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/")
+        tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv")
 
     script:
 

diff --git a/modules/local/chromomap/main.nf b/modules/local/chromomap/main.nf
@@ -1,6 +1,6 @@
 process GENERATE_CHROMOMAP_TABLE {
     label 'process_single'
-    tag "${meta.id}"    
+    tag "${meta.id} ${set_name}"    
     container 'quay.io/microbiome-informatics/bioruby:2.0.1'
 
     input:
@@ -30,7 +30,7 @@ process GENERATE_CHROMOMAP_TABLE {
 
 process CHROMOMAP {
     label 'process_low'
-    tag "${meta.id}"
+    tag "${meta.id} ${set_name}"
     container 'quay.io/microbiome-informatics/r_chromomap:0.3'
 
     input:

diff --git a/modules/local/help.nf b/modules/local/help.nf
@@ -81,7 +81,7 @@ def helpMSG() {
 
     ${c_yellow}HPC computing:${c_reset}
     Especially for execution of the workflow on a HPC (LSF, SLURM) adjust the following parameters if needed:
-    --databases               defines the path where databases are stored [default: $params.dbs]
+    --databases               defines the path where databases are stored [default: $params.databases]
     --workdir                 defines the path where nextflow writes tmp files [default: $params.workdir]
     --singularity_cachedir    defines the path where images (singularity) are cached [default: $params.singularity_cachedir] 
 

diff --git a/modules/local/hmmscan/main.nf b/modules/local/hmmscan/main.nf
@@ -9,20 +9,20 @@ process HMMSCAN {
       path(db)
 
     output:
-      tuple val(meta), val(set_name), path("${set_name}_${params.dbs}_hmmscan.tbl"), path(faa)
+      tuple val(meta), val(set_name), path("${set_name}_${params.databases}_hmmscan.tbl"), path(faa)
 
     script:
     """
-    if [[ ${params.dbs} == "viphogs" ]]; then
+    if [[ ${params.databases} == "viphogs" ]]; then
       if [[ ${params.version} == "v1" ]]; then
-        hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.dbs}_hmmscan.tbl ${db}/${db}.hmm ${faa}
+        hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa}
       else
-        hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.dbs}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa}
+        hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.databases}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa}
         #filter evalue for models that dont have any GA cutoff
-        awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.dbs}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl
+        awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.databases}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl
       fi
     else
-      hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.dbs}_hmmscan.tbl ${db}/${db}.hmm ${faa}
+      hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa}
     fi
     """
 }
diff --git a/modules/local/sankey/main.nf b/modules/local/sankey/main.nf
@@ -1,6 +1,6 @@
 process GENERATE_SANKEY_TABLE {
     label 'process_low'
-    tag "${meta.id}"    
+    tag "${meta.id} ${set_name}"    
     container 'quay.io/microbiome-informatics/bioruby:2.0.1'
 
     input:
@@ -23,7 +23,7 @@ process GENERATE_SANKEY_TABLE {
 process SANKEY {
 
     label 'process_low'
-    tag "${meta.id}"
+    tag "${meta.id} ${set_name}"
     container 'quay.io/microbiome-informatics/sankeyd3:0.12.3'
 
     input:

diff --git a/nextflow.config b/nextflow.config
@@ -63,7 +63,7 @@ params {
     finaldir             = '08-final'
 
     // location for autodownload data like databases
-    dbs                  = 'nextflow-autodownload-databases'
+    databases            = 'nextflow-autodownload-databases'
 
     // optional profile configurations, mostly necessary for HPC execution [lsf, slurm]
     workdir              = 'work'
@@ -96,14 +96,12 @@ profiles {
                	cpus = params.max_cores
         } 
         workDir = params.workdir
-        params.databases = params.dbs
         params.cloudProcess = false
         includeConfig 'configs/local.config'
     }
 
     lsf {
         workDir = params.workdir
-        params.databases = params.dbs
         executor {
             name = "lsf"
             queueSize = 200
@@ -114,7 +112,6 @@ profiles {
 
     slurm {
         workDir = params.workdir
-        params.databases = params.dbs
         executor {
             name = "slurm"
             queueSize = 200
@@ -150,7 +147,6 @@ profiles {
                	cpus = params.max_cores
         }
         workDir = params.workdir
-        params.databases = params.dbs
         params.cloudProcess = false
         includeConfig 'configs/local.config'
         docker { enabled = true }

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -148,19 +148,19 @@
             "description": "Input parameters",
             "properties": {
                 "virome": {
-                    "type": "string",
+                    "type": "boolean",
                     "description": "VirSorter parameter, set when running a data set mostly composed of viruses"
                 },
                 "chromomap": {
-                    "type": "string",
+                    "type": "boolean",
                     "description": "feature to activate chromomap plot"
                 },
                 "balloon": {
-                    "type": "string",
+                    "type": "boolean",
                     "description": "feature to activate balloon plot"
                 },
                 "onlyannotate": {
-                    "type": "string",
+                    "type": "boolean",
                     "description": "Only annotate the input FASTA (no virus prediction, only contig length filtering)"
                 },
                 "mashmap_len": {
@@ -169,7 +169,7 @@
                     "description": "Mashmap mapping segment length, shorter sequences will be ignored"
                 },
                 "mashmap": {
-                    "type": "string",
+                    "type": "boolean",
                     "description": "Map the viral contigs against the provided reference"
                 },
                 "evalue": {
@@ -189,7 +189,7 @@
                 },
                 "factor": {
                     "type": "string",
-                    "default": "/Users/kates/Desktop/EBI/MGnify/pipelines/emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv",
+                    "default": "emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv",
                     "description": "Path to file with viral assemblies metadata, including taxon-specific factors"
                 },
                 "sankey": {
@@ -292,7 +292,7 @@
             "fa_icon": "fas fa-dna",
             "description": "Nextflow arguments",
             "properties": {
-                "dbs": {
+                "databases": {
                     "type": "string",
                     "default": "nextflow-autodownload-databases",
                     "description": "directory path to databases"
@@ -307,11 +307,6 @@
                     "default": "singularity",
                     "description": "singularity folder"
                 },
-                "databases": {
-                    "type": "string",
-                    "default": "nextflow-autodownload-databases",
-                    "description": "directory path to databases"
-                },
                 "cloudProcess": {
                     "type": "boolean",
                     "description": "run on cloud"

diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf
@@ -93,35 +93,21 @@ workflow ANNOTATE {
     }
 
     CHECKV(
-      predicted_contigs.join(contigs.map { meta, fasta -> fasta }),
-      checkv_db
+      predicted_contigs,
+      checkv_db.first()
     )
-
-    viphos_annotations = ANNOTATION.out.annotations.groupTuple().map{ 
-          meta, values -> {
-             def annotations = values.collect{it[1]};
-             return [meta, annotations] }                                         
-    }
-    taxonomy_annotations = ASSIGN.out.groupTuple().map{ 
-          meta, values -> {
-             def taxonomy = values.collect{it[1]};
-             return [meta, taxonomy] }                   
-    }
-    checkv_results = CHECKV.out.groupTuple().map{ 
-          meta, values -> {
-             def quality_summary = values.collect{it[1]};
-             return [meta, quality_summary] }                                              
-    }
+
+    viphos_annotations = ANNOTATION.out.annotations.map{meta, type, annotation -> [meta, annotation]}.groupTuple()
+    taxonomy_annotations = ASSIGN.out.map{meta, type, annotation -> [meta, annotation]}.groupTuple()
+    checkv_results = CHECKV.out.map{meta, type, quality -> [meta, quality]}.groupTuple()
 
     WRITE_GFF(
       contigs.join(viphos_annotations).join(taxonomy_annotations).join(checkv_results)
     )
 
-    chromomap_ch = Channel.empty()
     predicted_contigs_filtered = predicted_contigs.map { meta, set_name, fasta -> [set_name, meta, fasta] }
     plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { meta, set_name, dir, table -> [set_name, table] }
     chromomap_ch = predicted_contigs_filtered.join(plot_contig_map_filtered).map { set_name, assembly_name, fasta, tsv -> [assembly_name, set_name, fasta, tsv]}
-    chromomap_ch.view()
 
     emit:
     assign_output = ASSIGN.out

diff --git a/workflows/virify.nf b/workflows/virify.nf
@@ -34,7 +34,7 @@ if ( params.samplesheet ) {
     samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
     input_ch = samplesheet.map(groupReads)
 }
-input_ch.view()
+
 // one sample of assembly
 if (params.fasta) { 
    input_ch = Channel.fromPath( params.fasta, checkIfExists: true)
@@ -113,11 +113,12 @@ workflow VIRIFY {
          DOWNLOAD_DATABASES.out.virfinder_db, 
          DOWNLOAD_DATABASES.out.pprmeta_git
       )
-      postprocess_input_ch = DETECT.out
+      // (meta, fasta, map)
+      postprocess_input_ch = DETECT.out.detect_output
     }
 
     // ----------- POSTPROCESS: restore fasta file
-    POSTPROCESS(postprocess_input_ch)
+    POSTPROCESS(postprocess_input_ch)  // out: (meta, type(HC/LC/PP), fasta)
 
     // ----------- ANNOTATE
     ANNOTATE(