Skip to content

Commit

Permalink
fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
KateSakharova committed Oct 7, 2024
1 parent 0379788 commit cd0039a
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 71 deletions.
19 changes: 2 additions & 17 deletions configs/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -171,21 +171,6 @@ process {
failOnError: false,
pattern: "*_quality_summary.tsv"
],
[
path: "${params.output}",
saveAs: {
filename -> {
if ( filename.equals('versions.yml') ) {
return null;
}
def output_file = new File(filename);
return "${meta.id}/${params.checkvdir}/${output_file.name}";
}
},
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*.tsv"
],
]
}

Expand Down Expand Up @@ -329,12 +314,12 @@ process {
return null;
}
def output_file = new File(filename);
return "${meta.id}/${params.hmmerdir}/${params.dbs}/${output_file.name}";
return "${meta.id}/${params.hmmerdir}/${params.databases}/${output_file.name}";
}
},
mode: params.publish_dir_mode,
failOnError: false,
pattern: "*_${params.dbs}_hmmscan.tbl"
pattern: "*_${params.databases}_hmmscan.tbl"
]
]
}
Expand Down
6 changes: 3 additions & 3 deletions modules/local/checkv/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ process CHECKV {
container 'quay.io/microbiome-informatics/checkv:0.8.1__1'

input:
tuple val(meta), val(confidence_set_name), path(fasta), path(contigs)
file(database)
tuple val(meta), val(confidence_set_name), path(fasta)
path(database)

output:
tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv"), path("${confidence_set_name}/")
tuple val(meta), val(confidence_set_name), path("${confidence_set_name}_quality_summary.tsv")

script:

Expand Down
4 changes: 2 additions & 2 deletions modules/local/chromomap/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process GENERATE_CHROMOMAP_TABLE {
label 'process_single'
tag "${meta.id}"
tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/bioruby:2.0.1'

input:
Expand Down Expand Up @@ -30,7 +30,7 @@ process GENERATE_CHROMOMAP_TABLE {

process CHROMOMAP {
label 'process_low'
tag "${meta.id}"
tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/r_chromomap:0.3'

input:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/help.nf
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def helpMSG() {
${c_yellow}HPC computing:${c_reset}
Especially for execution of the workflow on a HPC (LSF, SLURM) adjust the following parameters if needed:
--databases defines the path where databases are stored [default: $params.dbs]
--databases defines the path where databases are stored [default: $params.databases]
--workdir defines the path where nextflow writes tmp files [default: $params.workdir]
--singularity_cachedir defines the path where images (singularity) are cached [default: $params.singularity_cachedir]
Expand Down
12 changes: 6 additions & 6 deletions modules/local/hmmscan/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,20 @@ process HMMSCAN {
path(db)

output:
tuple val(meta), val(set_name), path("${set_name}_${params.dbs}_hmmscan.tbl"), path(faa)
tuple val(meta), val(set_name), path("${set_name}_${params.databases}_hmmscan.tbl"), path(faa)

script:
"""
if [[ ${params.dbs} == "viphogs" ]]; then
if [[ ${params.databases} == "viphogs" ]]; then
if [[ ${params.version} == "v1" ]]; then
hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.dbs}_hmmscan.tbl ${db}/${db}.hmm ${faa}
hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa}
else
hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.dbs}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa}
hmmscan --cpu ${task.cpus} --noali --cut_ga --domtblout ${set_name}_${params.databases}_hmmscan_cutga.tbl ${db}/${db}.hmm ${faa}
#filter evalue for models that dont have any GA cutoff
awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.dbs}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl
awk '{if(\$1 ~ /^#/){print \$0}else{if(\$7<0.001){print \$0}}}' ${set_name}_${params.databases}_hmmscan_cutga.tbl > ${set_name}_${params.db}_hmmscan.tbl
fi
else
hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.dbs}_hmmscan.tbl ${db}/${db}.hmm ${faa}
hmmscan --cpu ${task.cpus} --noali -E "0.001" --domtblout ${set_name}_${params.databases}_hmmscan.tbl ${db}/${db}.hmm ${faa}
fi
"""
}
4 changes: 2 additions & 2 deletions modules/local/sankey/main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process GENERATE_SANKEY_TABLE {
label 'process_low'
tag "${meta.id}"
tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/bioruby:2.0.1'

input:
Expand All @@ -23,7 +23,7 @@ process GENERATE_SANKEY_TABLE {
process SANKEY {

label 'process_low'
tag "${meta.id}"
tag "${meta.id} ${set_name}"
container 'quay.io/microbiome-informatics/sankeyd3:0.12.3'

input:
Expand Down
6 changes: 1 addition & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ params {
finaldir = '08-final'

// location for autodownload data like databases
dbs = 'nextflow-autodownload-databases'
databases = 'nextflow-autodownload-databases'

// optional profile configurations, mostly necessary for HPC execution [lsf, slurm]
workdir = 'work'
Expand Down Expand Up @@ -96,14 +96,12 @@ profiles {
cpus = params.max_cores
}
workDir = params.workdir
params.databases = params.dbs
params.cloudProcess = false
includeConfig 'configs/local.config'
}

lsf {
workDir = params.workdir
params.databases = params.dbs
executor {
name = "lsf"
queueSize = 200
Expand All @@ -114,7 +112,6 @@ profiles {

slurm {
workDir = params.workdir
params.databases = params.dbs
executor {
name = "slurm"
queueSize = 200
Expand Down Expand Up @@ -150,7 +147,6 @@ profiles {
cpus = params.max_cores
}
workDir = params.workdir
params.databases = params.dbs
params.cloudProcess = false
includeConfig 'configs/local.config'
docker { enabled = true }
Expand Down
19 changes: 7 additions & 12 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -148,19 +148,19 @@
"description": "Input parameters",
"properties": {
"virome": {
"type": "string",
"type": "boolean",
"description": "VirSorter parameter, set when running a data set mostly composed of viruses"
},
"chromomap": {
"type": "string",
"type": "boolean",
"description": "feature to activate chromomap plot"
},
"balloon": {
"type": "string",
"type": "boolean",
"description": "feature to activate balloon plot"
},
"onlyannotate": {
"type": "string",
"type": "boolean",
"description": "Only annotate the input FASTA (no virus prediction, only contig length filtering)"
},
"mashmap_len": {
Expand All @@ -169,7 +169,7 @@
"description": "Mashmap mapping segment length, shorter sequences will be ignored"
},
"mashmap": {
"type": "string",
"type": "boolean",
"description": "Map the viral contigs against the provided reference"
},
"evalue": {
Expand All @@ -189,7 +189,7 @@
},
"factor": {
"type": "string",
"default": "/Users/kates/Desktop/EBI/MGnify/pipelines/emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv",
"default": "emg-viral-pipeline/references/viphogs_cds_per_taxon_cummulative.csv",
"description": "Path to file with viral assemblies metadata, including taxon-specific factors"
},
"sankey": {
Expand Down Expand Up @@ -292,7 +292,7 @@
"fa_icon": "fas fa-dna",
"description": "Nextflow arguments",
"properties": {
"dbs": {
"databases": {
"type": "string",
"default": "nextflow-autodownload-databases",
"description": "directory path to databases"
Expand All @@ -307,11 +307,6 @@
"default": "singularity",
"description": "singularity folder"
},
"databases": {
"type": "string",
"default": "nextflow-autodownload-databases",
"description": "directory path to databases"
},
"cloudProcess": {
"type": "boolean",
"description": "run on cloud"
Expand Down
26 changes: 6 additions & 20 deletions subworkflows/local/annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -93,35 +93,21 @@ workflow ANNOTATE {
}

CHECKV(
predicted_contigs.join(contigs.map { meta, fasta -> fasta }),
checkv_db
predicted_contigs,
checkv_db.first()
)

viphos_annotations = ANNOTATION.out.annotations.groupTuple().map{
meta, values -> {
def annotations = values.collect{it[1]};
return [meta, annotations] }
}
taxonomy_annotations = ASSIGN.out.groupTuple().map{
meta, values -> {
def taxonomy = values.collect{it[1]};
return [meta, taxonomy] }
}
checkv_results = CHECKV.out.groupTuple().map{
meta, values -> {
def quality_summary = values.collect{it[1]};
return [meta, quality_summary] }
}

viphos_annotations = ANNOTATION.out.annotations.map{meta, type, annotation -> [meta, annotation]}.groupTuple()
taxonomy_annotations = ASSIGN.out.map{meta, type, annotation -> [meta, annotation]}.groupTuple()
checkv_results = CHECKV.out.map{meta, type, quality -> [meta, quality]}.groupTuple()

WRITE_GFF(
contigs.join(viphos_annotations).join(taxonomy_annotations).join(checkv_results)
)

chromomap_ch = Channel.empty()
predicted_contigs_filtered = predicted_contigs.map { meta, set_name, fasta -> [set_name, meta, fasta] }
plot_contig_map_filtered = PLOT_CONTIG_MAP.out.map { meta, set_name, dir, table -> [set_name, table] }
chromomap_ch = predicted_contigs_filtered.join(plot_contig_map_filtered).map { set_name, assembly_name, fasta, tsv -> [assembly_name, set_name, fasta, tsv]}
chromomap_ch.view()

emit:
assign_output = ASSIGN.out
Expand Down
7 changes: 4 additions & 3 deletions workflows/virify.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ if ( params.samplesheet ) {
samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
input_ch = samplesheet.map(groupReads)
}
input_ch.view()

// one sample of assembly
if (params.fasta) {
input_ch = Channel.fromPath( params.fasta, checkIfExists: true)
Expand Down Expand Up @@ -113,11 +113,12 @@ workflow VIRIFY {
DOWNLOAD_DATABASES.out.virfinder_db,
DOWNLOAD_DATABASES.out.pprmeta_git
)
postprocess_input_ch = DETECT.out
// (meta, fasta, map)
postprocess_input_ch = DETECT.out.detect_output
}

// ----------- POSTPROCESS: restore fasta file
POSTPROCESS(postprocess_input_ch)
POSTPROCESS(postprocess_input_ch) // out: (meta, type(HC/LC/PP), fasta)

// ----------- ANNOTATE
ANNOTATE(
Expand Down

0 comments on commit cd0039a

Please sign in to comment.