From 13ba7e1d8ef4a947bbedba2a7f157e55348c3cd1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 26 Jan 2024 21:48:25 +0100 Subject: [PATCH 01/15] install createpon --- modules.json | 5 ++ .../environment.yml | 7 +++ .../createreadcountpanelofnormals/main.nf | 55 +++++++++++++++++++ .../createreadcountpanelofnormals/meta.yml | 45 +++++++++++++++ 4 files changed, 112 insertions(+) create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf create mode 100644 modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml diff --git a/modules.json b/modules.json index 8abdadf..f37f8a0 100644 --- a/modules.json +++ b/modules.json @@ -25,6 +25,11 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "gatk4/createreadcountpanelofnormals": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "gatk4/determinegermlinecontigploidy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml new file mode 100644 index 0000000..ea5b9bf --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createreadcountpanelofnormals +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf new file mode 100644 index 0000000..9d32a99 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/main.nf @@ -0,0 +1,55 @@ +process GATK4_CREATEREADCOUNTPANELOFNORMALS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(counts) + + output: + tuple val(meta), path("*.hdf5"), emit: pon + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = counts.collect(){"--input $it"}.join(" ") + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK CreateReadCountPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateReadCountPanelOfNormals \\ + ${args} \\ + ${input_list} \\ + --output ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.hdf5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml new file mode 100644 index 0000000..ba01f63 --- /dev/null +++ b/modules/nf-core/gatk4/createreadcountpanelofnormals/meta.yml @@ -0,0 +1,45 @@ +name: "gatk4_createreadcountpanelofnormals" +description: Creates a panel of normals (PoN) for read-count denoising given the read counts for samples in the panel. +keywords: + - createreadcountpanelofnormals + - gatk4 + - panelofnormals +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + tool_dev_url: "https://github.com/broadinstitute/gatk" + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - counts: + type: file + description: Read counts in hdf5 or tsv format. + pattern: "*.{hdf5,tsv}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pon: + type: file + description: Panel-of-normals file. + pattern: "*.{hdf5}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" From 09999ba89eff2d7c8b2e0a3add0f759df4b79efa Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 27 Jan 2024 00:23:30 +0100 Subject: [PATCH 02/15] add gens --- conf/modules/gens_pon.config | 65 +++++++++++++ conf/modules/germlinecnvcaller_cohort.config | 6 +- conf/test.config | 4 +- docs/usage.md | 8 +- nextflow.config | 18 ++-- nextflow_schema.json | 41 ++++++-- subworkflows/local/gens_pon.nf | 94 +++++++++++++++++++ .../pipeline/germlinecnvcaller_cohort.nf.test | 2 +- workflows/createpanelrefs.nf | 31 ++++-- 9 files changed, 234 insertions(+), 35 deletions(-) create mode 100644 conf/modules/gens_pon.config create mode 100644 subworkflows/local/gens_pon.nf diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config new file mode 100644 index 0000000..dea78a2 --- /dev/null +++ b/conf/modules/gens_pon.config @@ -0,0 +1,65 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: '.*GENS_PON.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*GENS_PON:SAMTOOLS_FAIDX' { + ext.when = { params.fai.equals(null) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENS_PON:PICARD_CREATESEQUENCEDICTIONARY' { + ext.when = { params.dict.equals(null) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/references" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENS_PON:GATK4_PREPROCESSINTERVALS' { + ext.args = { ["--imr OVERLAPPING_ONLY", + "--bin-length ${params.gens_bin_length}"].join(" ") + } + } + + withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { + ext.args = {"--format ${params.readcount_format} --imr OVERLAPPING_ONLY"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/readcounts" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS' { + ext.args = { ["--minimum-interval-median-percentile 10.0", + "--maximum-chunk-size 29349635"].join(" ")} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, + pattern: "*-model", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/modules/germlinecnvcaller_cohort.config b/conf/modules/germlinecnvcaller_cohort.config index b7204f5..9e26e61 100644 --- a/conf/modules/germlinecnvcaller_cohort.config +++ b/conf/modules/germlinecnvcaller_cohort.config @@ -38,8 +38,8 @@ process { withName: '.*GERMLINECNVCALLER_COHORT:GATK4_PREPROCESSINTERVALS' { ext.args = { ["--imr OVERLAPPING_ONLY", - "--padding ${params.padding}", - "--bin-length ${params.bin_length}"].join(" ") + "--padding ${params.gcnv_padding}", + "--bin-length ${params.gcnv_bin_length}"].join(" ") } } @@ -63,7 +63,7 @@ process { } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_INTERVALLISTTOOLS' { - ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT ${params.scatter_content}"} + ext.args = {"--SUBDIVISION_MODE INTERVAL_COUNT --SCATTER_CONTENT ${params.gcnv_scatter_content}"} } withName: '.*GERMLINECNVCALLER_COHORT:GATK4_DETERMINEGERMLINECONTIGPLOIDY' { diff --git a/conf/test.config b/conf/test.config index 41e6bed..550e42d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,8 +26,8 @@ params { tools = 'cnvkit' //Germlinecnvcaller options - scatter_content = 2 - ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_scatter_content = 2 + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome genome = null diff --git a/docs/usage.md b/docs/usage.md index c2ac6a6..4f9d2a4 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -111,10 +111,10 @@ If you wish to share such profile (such as upload as supplementary material for If you are running the pipeline to generate references for the GATK's germlinecnvcalling workflow, you should ensure that you have provided all the mandatory options specified in the table below. -| Mandatory | Optional | -| ------------------------- | -------- | -| fasta/genomes | fai | -| ploidy_priors1 | dict | +| Mandatory | Optional | +| ------------------------------ | -------- | +| fasta/genomes | fai | +| gcnv_ploidy_priors1 | dict | 1 To learn more about this file, see [this comment](https://gatk.broadinstitute.org/hc/en-us/community/posts/360074399831/comments/13441240230299) on GATK forum.
diff --git a/nextflow.config b/nextflow.config index b5fe870..e355fd0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,13 +20,16 @@ params { tools = null // No default, must be specified // Germlinecnvcaller options - bin_length = 1000 - mappable_regions = null - padding = 0 - ploidy_priors = null - readcount_format = 'HDF5' - scatter_content = 5000 - segmental_duplications = null + gcnv_bin_length = 1000 + gcnv_mappable_regions = null + gcnv_padding = 0 + gcnv_ploidy_priors = null + gcnv_readcount_format = 'HDF5' + gcnv_scatter_content = 5000 + gcnv_segmental_duplications = null + + // Germlinecnvcaller options + gens_bin_length = 100 // CNVkit options cnvkit_targets = null @@ -253,6 +256,7 @@ manifest { includeConfig 'conf/modules/base.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/germlinecnvcaller_cohort.config' +includeConfig 'conf/modules/gens_pon.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/nextflow_schema.json b/nextflow_schema.json index 5a0a915..613f33f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,14 +11,14 @@ "description": "Options used by the germlinecnvcaller subworkflow", "default": "", "properties": { - "bin_length": { + "gcnv_bin_length": { "type": "number", "default": 1000, "description": "Length (in bp) of the bins. If zero, no binning will be performed.", "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a bin length of 1000 for WGS analysis, and 0 for WES analysis. " }, - "mappable_regions": { + "gcnv_mappable_regions": { "type": "string", "exists": true, "description": "Path to Umap single-read mappability track in .bed or .bed.gz format. Overlapping intervals must be merged.", @@ -26,14 +26,14 @@ "fa_icon": "fas fa-file", "help_text": "Used by GATK's AnnotateIntervals." }, - "padding": { + "gcnv_padding": { "type": "number", "description": "Length (in bp) of the padding regions on each side of the intervals.", "default": 0, "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK's PreprocessIntervals. GATK recommends a padding of 0 for WGS analysis, and 250 for WES analysis." }, - "ploidy_priors": { + "gcnv_ploidy_priors": { "type": "string", "exists": true, "format": "file-path", @@ -42,21 +42,21 @@ "fa_icon": "fas fa-file", "help_text": "Used by GATK's DeterminGermlineContigPloidy." }, - "readcount_format": { + "gcnv_readcount_format": { "type": "string", "description": "Output file format for count data", "default": "HDF5", "fa_icon": "fas fa-align-left", "enum": ["HDF5", "TSV"] }, - "scatter_content": { + "gcnv_scatter_content": { "type": "number", "description": "When scattering with this argument, each of the resultant files will (ideally) have this amount of interval-counts.", "default": 5000, "fa_icon": "fas fa-sort-numeric-down", "help_text": "Used by GATK/Picards's IntervalListTools." }, - "segmental_duplications": { + "gcnv_segmental_duplications": { "type": "string", "exists": true, "description": "Path to segmental-duplication track in .bed or .bed.gz format. Overlapping intervals must be merged.", @@ -66,6 +66,28 @@ } } }, + "gens_options": { + "title": "GENS options", + "type": "object", + "description": "Options used by the gens subworkflow", + "default": "", + "properties": { + "gens_bin_length": { + "type": "number", + "default": 100, + "description": "Length (in bp) of the bins. If zero, no binning will be performed.", + "fa_icon": "fas fa-sort-numeric-down", + "help_text": "Used by GATK's PreprocessIntervals. We recommend a bin length of 100." + }, + "gens_readcount_format": { + "type": "string", + "description": "Output file format for count data", + "default": "HDF5", + "fa_icon": "fas fa-align-left", + "enum": ["HDF5", "TSV"] + } + } + }, "cnvkit_options": { "title": "CNVkit options", "type": "object", @@ -134,7 +156,7 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for building Panel of Normals or models.", "help_text": "Multiple tools separated with commas.\n\nTools available: CNVKIT,germlinecnvcaller", - "pattern": "^((cnvkit|germlinecnvcaller)?,?)*(? + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } + + SAMTOOLS_INDEX (ch_for_mix.alignment_without_index) + + SAMTOOLS_INDEX.out.bai + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } + + // + // Collect alignment files and their indices + // + ch_for_mix.alignment_without_index + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) + .set {ch_readcounts_in} + + // + // Collect read counts, and generate models + // + GATK4_COLLECTREADCOUNTS (ch_readcounts_in, + ch_fasta, + ch_fai, + ch_dict) + + GATK4_COLLECTREADCOUNTS.out.tsv + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .set { ch_readcounts_out } + + + GATK4_CREATEREADCOUNTPANELOFNORMALS (ch_readcounts_out) + + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) + + emit: + genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon + readcounts = ch_readcounts_out + versions = ch_versions +} diff --git a/tests/pipeline/germlinecnvcaller_cohort.nf.test b/tests/pipeline/germlinecnvcaller_cohort.nf.test index febc7da..2741b5f 100644 --- a/tests/pipeline/germlinecnvcaller_cohort.nf.test +++ b/tests/pipeline/germlinecnvcaller_cohort.nf.test @@ -11,7 +11,7 @@ nextflow_workflow { params { outdir = "$outputDir" tools = 'germlinecnvcaller' - scatter_content = 2 + gcnv_scatter_content = 2 fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" } diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index 6e768c8..c22874d 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -43,16 +43,16 @@ ch_input = ch_from_samplesheet.map{meta, bam, bai, cram, crai -> } // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - : Channel.empty() -ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() - : Channel.empty() -ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() - : Channel.empty() -ch_ploidy_priors = params.ploidy_priors ? Channel.fromPath(params.ploidy_priors).collect() - : Channel.empty() -ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() - : Channel.value([[:],[]]) +ch_dict = params.dict ? Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + : Channel.empty() +ch_fai = params.fai ? Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + : Channel.empty() +ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + : Channel.empty() +ch_ploidy_priors = params.gcnv_ploidy_priors ? Channel.fromPath(params.gcnv_ploidy_priors).collect() + : Channel.empty() +ch_cnvkit_targets = params.cnvkit_targets ? Channel.fromPath(params.cnvkit_targets).map { targets -> [[id:targets.baseName],targets]}.collect() + : Channel.value([[:],[]]) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -75,6 +75,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // +include { GENS_PON } from '../subworkflows/local/gens_pon' include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' /* @@ -134,6 +135,16 @@ workflow CREATEPANELREFS { ch_versions = ch_versions.mix(GERMLINECNVCALLER_COHORT.out.versions) } + if (params.tools && params.tools.split(',').contains('gens')) { + + GENS_PON(ch_dict, + ch_fai, + ch_fasta, + ch_input) + + ch_versions = ch_versions.mix(GENS_PON.out.versions) + } + CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) From fe0ab74e9f8247792474567888783d11138f9b6a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 29 Jan 2024 22:38:10 +0100 Subject: [PATCH 03/15] update indent and comment --- conf/test.config | 2 +- nextflow.config | 2 +- subworkflows/local/gens_pon.nf | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/test.config b/conf/test.config index 550e42d..00bbe3b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,7 +27,7 @@ params { //Germlinecnvcaller options gcnv_scatter_content = 2 - gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" + gcnv_ploidy_priors = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" // Small reference genome genome = null diff --git a/nextflow.config b/nextflow.config index e355fd0..d51a0aa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { gcnv_scatter_content = 5000 gcnv_segmental_duplications = null - // Germlinecnvcaller options + // Gens options gens_bin_length = 100 // CNVkit options diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon.nf index 8dc2ec2..1bf8e57 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon.nf @@ -1,9 +1,9 @@ include { GATK4_COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' include { GATK4_PREPROCESSINTERVALS } from '../../modules/nf-core/gatk4/preprocessintervals/main' include { PICARD_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/picard/createsequencedictionary/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' -include { GATK4_CREATEREADCOUNTPANELOFNORMALS } from '../../modules/nf-core/gatk4/createreadcountpanelofnormals/main' workflow GENS_PON { take: @@ -80,12 +80,12 @@ workflow GENS_PON { GATK4_CREATEREADCOUNTPANELOFNORMALS (ch_readcounts_out) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_PREPROCESSINTERVALS.out.versions) + ch_versions = ch_versions.mix(PICARD_CREATESEQUENCEDICTIONARY.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) emit: genspon = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon From 64e2ee48a77ca9a984bc70e6bf31a0104777edb9 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:35:40 +0100 Subject: [PATCH 04/15] fix lint errors --- subworkflows/local/gens_pon.nf | 50 ++++++++++++++-------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon.nf index 1bf8e57..326e6be 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon.nf @@ -18,9 +18,9 @@ workflow GENS_PON { // // Prepare references // - SAMTOOLS_FAIDX (ch_fasta, [[:],[]]) + SAMTOOLS_FAIDX ( ch_fasta, [[:],[]] ) - PICARD_CREATESEQUENCEDICTIONARY (ch_fasta) + PICARD_CREATESEQUENCEDICTIONARY ( ch_fasta ) ch_user_dict .mix(PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict) @@ -32,53 +32,45 @@ workflow GENS_PON { .collect() .set { ch_fai } - GATK4_PREPROCESSINTERVALS (ch_fasta, - ch_fai, - ch_dict, - [[:],[]], [[:],[]]) - + GATK4_PREPROCESSINTERVALS ( ch_fasta, ch_fai, ch_dict, [[:],[]], [[:],[]] ) // // Filter out files that lack indices, and generate them // ch_input - .branch { meta, alignment, index -> - alignment_with_index: index.size() > 0 - return [meta, alignment, index] - alignment_without_index: index.size() == 0 - return [meta, alignment] - } - .set { ch_for_mix } + .branch { meta, alignment, index -> + alignment_with_index: index.size() > 0 + return [meta, alignment, index] + alignment_without_index: index.size() == 0 + return [meta, alignment] + } + .set { ch_for_mix } - SAMTOOLS_INDEX (ch_for_mix.alignment_without_index) + SAMTOOLS_INDEX ( ch_for_mix.alignment_without_index ) SAMTOOLS_INDEX.out.bai - .mix(SAMTOOLS_INDEX.out.crai) - .set { ch_index } + .mix(SAMTOOLS_INDEX.out.crai) + .set { ch_index } // // Collect alignment files and their indices // ch_for_mix.alignment_without_index - .join(ch_index) - .mix(ch_for_mix.alignment_with_index) - .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) - .set {ch_readcounts_in} + .join(ch_index) + .mix(ch_for_mix.alignment_with_index) + .combine(GATK4_PREPROCESSINTERVALS.out.interval_list.map{it -> it[1]}) + .set {ch_readcounts_in} // // Collect read counts, and generate models // - GATK4_COLLECTREADCOUNTS (ch_readcounts_in, - ch_fasta, - ch_fai, - ch_dict) + GATK4_COLLECTREADCOUNTS ( ch_readcounts_in, ch_fasta, ch_fai, ch_dict ) GATK4_COLLECTREADCOUNTS.out.tsv - .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) - .set { ch_readcounts_out } - + .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .set { ch_readcounts_out } - GATK4_CREATEREADCOUNTPANELOFNORMALS (ch_readcounts_out) + GATK4_CREATEREADCOUNTPANELOFNORMALS ( ch_readcounts_out ) ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions.first()) ch_versions = ch_versions.mix(GATK4_CREATEREADCOUNTPANELOFNORMALS.out.versions.first()) From c93b543d6ee31e82ee274f3a148031f69be2bbfc Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:48:33 +0100 Subject: [PATCH 05/15] fix lint --- main.nf | 17 ++++++++++------- nextflow.config | 2 -- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index 71b8149..2dfd69b 100644 --- a/main.nf +++ b/main.nf @@ -30,13 +30,16 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_crea // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') -params.fai = getGenomeAttribute('fai') -params.dict = getGenomeAttribute('dict') -params.target_bed = getGenomeAttribute('target_bed') -params.target_interval_list = getGenomeAttribute('target_interval_list') -params.exclude_bed = getGenomeAttribute('exclude_bed') -params.exclude_interval_list = getGenomeAttribute('exclude_interval_list') +params.fasta = getGenomeAttribute('fasta') +params.fai = getGenomeAttribute('fai') +params.dict = getGenomeAttribute('dict') +params.gcnv_exclude_bed = getGenomeAttribute('gcnv_exclude_bed') +params.gcnv_exclude_interval_list = getGenomeAttribute('gcnv_exclude_interval_list') +params.gcnv_mappable_regions = getGenomeAttribute('gcnv_mappable_regions') +params.gcnv_target_bed = getGenomeAttribute('gcnv_target_bed') +params.gcnv_target_interval_list = getGenomeAttribute('gcnv_target_interval_list') +params.gcnv_ploidy_priors = getGenomeAttribute('gcnv_ploidy_priors') +params.gcnv_segmental_duplications = getGenomeAttribute('gcnv_segmental_duplications') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/nextflow.config b/nextflow.config index f685b59..7f0a4d1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,9 +25,7 @@ params { // Germlinecnvcaller options gcnv_analysis_type = 'wgs' gcnv_bin_length = 1000 - gcnv_mappable_regions = null gcnv_padding = 0 - gcnv_ploidy_priors = null gcnv_readcount_format = 'HDF5' gcnv_scatter_content = 5000 gcnv_segmental_duplications = null From 6b9501eb4692559546cf010cd80f92f50b917567 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:45:42 +0100 Subject: [PATCH 06/15] add tests --- conf/modules/gens_pon.config | 1 - subworkflows/local/gens_pon.nf | 4 ++ tests/config/tags.yml | 7 +++ tests/pipeline/gens_pon.nf.test | 49 +++++++++++++++++++ tests/test_assets/gens_software_versions.yaml | 12 +++++ 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tests/pipeline/gens_pon.nf.test create mode 100644 tests/test_assets/gens_software_versions.yaml diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index dea78a2..ee473b1 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -57,7 +57,6 @@ process { publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/gens_pon/createreadcountpanelofnormals" }, - pattern: "*-model", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } diff --git a/subworkflows/local/gens_pon.nf b/subworkflows/local/gens_pon.nf index 326e6be..38c9727 100644 --- a/subworkflows/local/gens_pon.nf +++ b/subworkflows/local/gens_pon.nf @@ -68,6 +68,10 @@ workflow GENS_PON { GATK4_COLLECTREADCOUNTS.out.tsv .mix(GATK4_COLLECTREADCOUNTS.out.hdf5) + .collect { it[1] } + .map { it -> + return [[id:"gens_pon"], it] + } .set { ch_readcounts_out } GATK4_CREATEREADCOUNTPANELOFNORMALS ( ch_readcounts_out ) diff --git a/tests/config/tags.yml b/tests/config/tags.yml index dd415e2..3672b33 100644 --- a/tests/config/tags.yml +++ b/tests/config/tags.yml @@ -18,3 +18,10 @@ germlinecnvcaller_cohort: - subworkflows/local/germlinecnvcaller_cohort.nf - tests/pipeline/germlinecnvcaller_cohort.nf.test - tests/pipeline/germlinecnvcaller_cohort.nf.config + - tests/test_assets/germlinecnvcaller_software_versions.yaml + +gens_pon: + - conf/modules/gens_pon.config + - subworkflows/local/gens_pon.nf + - tests/pipeline/gens_pon.nf.test + - tests/test_assets/gens_software_versions.yaml diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test new file mode 100644 index 0000000..fec08b2 --- /dev/null +++ b/tests/pipeline/gens_pon.nf.test @@ -0,0 +1,49 @@ +nextflow_workflow { + + name "Test Workflow GENS_PON" + script "subworkflows/local/gens_pon.nf" + workflow "GENS_PON" + tag "gens" + + test("Run gens test") { + + when { + params { + outdir = "$outputDir" + tools = 'gens' + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + } + workflow { + """ + input[0] = Channel.empty() + input[1] = Channel.empty() + input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() + input[3] = Channel.of( + [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], + [[ id:'test2' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam"),[]]) + """ + } + } + + then { + assert workflow.success + assert workflow.trace.succeeded().size() == 8 + assert workflow.trace.failed().size() == 0 + + assert path("$outputDir/gens_pon/references/genome.dict").toFile().isFile() + assert path("$outputDir/gens_pon/references/genome.fasta.fai").toFile().isFile() + assert path("$outputDir/gens_pon/createreadcountpanelofnormals/gens_pon.hdf5").toFile().isFile() + + def expected = path("$baseDir/tests/test_assets/gens_software_versions.yaml").yaml.collect() + def observed_list = [] + def observed = workflow.out.versions.collect {f -> path(f).yaml.entrySet()} + observed.stream() + .forEach(observed_list::addAll) + + assertContainsInAnyOrder(expected, observed_list) + + } + + } + +} diff --git a/tests/test_assets/gens_software_versions.yaml b/tests/test_assets/gens_software_versions.yaml new file mode 100644 index 0000000..9fec793 --- /dev/null +++ b/tests/test_assets/gens_software_versions.yaml @@ -0,0 +1,12 @@ +"GENS_PON:SAMTOOLS_INDEX": + samtools: 1.18 +"GENS_PON:GATK4_COLLECTREADCOUNTS": + gatk4: 4.4.0.0 +"GENS_PON:SAMTOOLS_FAIDX": + samtools: 1.18 +"GENS_PON:GATK4_PREPROCESSINTERVALS": + gatk4: 4.4.0.0 +"GENS_PON:PICARD_CREATESEQUENCEDICTIONARY": + picard: 3.1.1 +"GENS_PON:GATK4_CREATEREADCOUNTPANELOFNORMALS": + gatk4: 4.4.0.0 From ad579b93ca42216c8ee1b299bc4f879270a78536 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:05:38 +0100 Subject: [PATCH 07/15] fix defaults --- workflows/createpanelrefs.nf | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index c481dac..b7caa1b 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -15,6 +15,7 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crea ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { GENS_PON } from '../subworkflows/local/gens_pon' include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' /* @@ -58,32 +59,6 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.mu ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// - -include { GENS_PON } from '../subworkflows/local/gens_pon' -include { GERMLINECNVCALLER_COHORT } from '../subworkflows/local/germlinecnvcaller_cohort' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// - -include { CNVKIT_BATCH } from '../modules/nf-core/cnvkit/batch/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 9002e514417be4ea0fc2b822ed1b0c95e72f2782 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:41:10 +0100 Subject: [PATCH 08/15] test singularity --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d8031c..f617509 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: # Run tests based on changes in code tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] # Only run docker tests on dev branch - profile: ["docker"] + profile: ["docker", "singularity"] # Only test minimal version NXF_VER: ["23.10.0"] # Always run default test From d718b26812da9ad50526e5e28c214e39dacae87f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 18:47:56 +0100 Subject: [PATCH 09/15] install singularity --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f617509..8c8766b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,6 +91,12 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Set up Singularity + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-singularity@v5 + with: + singularity-version: 3.7.1 + - name: Run nf-test uses: Wandalen/wretry.action@v1.0.11 with: From cd0fbdc517838e8fd92c241aa0a64a50b9de3908 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Thu, 28 Mar 2024 22:59:11 +0100 Subject: [PATCH 10/15] remove disk cleanup --- .github/workflows/ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c8766b..bbddca7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,9 +88,6 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Set up Singularity if: matrix.profile == 'singularity' uses: eWaterCycle/setup-singularity@v5 From 29c198972d1b62bcc921adef891c2488b45780e4 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 29 Mar 2024 22:53:39 +0100 Subject: [PATCH 11/15] add config --- conf/modules/gens_pon.config | 2 +- tests/pipeline/gens_pon.config | 7 +++++++ tests/pipeline/gens_pon.nf.test | 16 +++++++++++----- workflows/createpanelrefs.nf | 13 ++++++++----- 4 files changed, 27 insertions(+), 11 deletions(-) create mode 100644 tests/pipeline/gens_pon.config diff --git a/conf/modules/gens_pon.config b/conf/modules/gens_pon.config index ee473b1..5ea92b5 100644 --- a/conf/modules/gens_pon.config +++ b/conf/modules/gens_pon.config @@ -43,7 +43,7 @@ process { } withName: '.*GENS_PON:GATK4_COLLECTREADCOUNTS' { - ext.args = {"--format ${params.readcount_format} --imr OVERLAPPING_ONLY"} + ext.args = {"--format ${params.gens_readcount_format} --imr OVERLAPPING_ONLY"} publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/gens_pon/readcounts" }, diff --git a/tests/pipeline/gens_pon.config b/tests/pipeline/gens_pon.config new file mode 100644 index 0000000..0a48b59 --- /dev/null +++ b/tests/pipeline/gens_pon.config @@ -0,0 +1,7 @@ +process { + + withName: 'GATK4_CREATEREADCOUNTPANELOFNORMALS' { + ext.args = "--minimum-interval-median-percentile 10 --number-of-eigensamples 2" + } + +} diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test index fec08b2..2e06852 100644 --- a/tests/pipeline/gens_pon.nf.test +++ b/tests/pipeline/gens_pon.nf.test @@ -4,19 +4,25 @@ nextflow_workflow { script "subworkflows/local/gens_pon.nf" workflow "GENS_PON" tag "gens" + config "tests/pipeline/gens_pon.config" test("Run gens test") { when { params { - outdir = "$outputDir" - tools = 'gens' - fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + outdir = "$outputDir" + gens_readcount_format = "TSV" + gens_bin_length = 100 + tools = 'gens' + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai" + dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.dict" + } workflow { """ - input[0] = Channel.empty() - input[1] = Channel.empty() + input[0] = Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() + input[1] = Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() input[3] = Channel.of( [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], diff --git a/workflows/createpanelrefs.nf b/workflows/createpanelrefs.nf index b7caa1b..8a76ba9 100644 --- a/workflows/createpanelrefs.nf +++ b/workflows/createpanelrefs.nf @@ -114,10 +114,17 @@ workflow CREATEPANELREFS { if (params.tools && params.tools.split(',').contains('gens')) { + ch_samplesheet + .map{meta, bam, bai, cram, crai -> + if (bam) return [ meta + [data_type:"bam"], bam, bai ] + if (cram) return [ meta + [data_type:"cram"], cram, crai ] + } + .set { ch_gens_input } + GENS_PON(ch_dict, ch_fai, ch_fasta, - ch_input) + ch_gens_input) ch_versions = ch_versions.mix(GENS_PON.out.versions) } @@ -132,12 +139,8 @@ workflow CREATEPANELREFS { // // MODULE: MultiQC // - ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) From b69f503cd21753e51c95fc15ac67e1c4169e8a60 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 29 Mar 2024 22:55:32 +0100 Subject: [PATCH 12/15] update ci --- .github/workflows/ci.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bbddca7..9d8031c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: # Run tests based on changes in code tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] # Only run docker tests on dev branch - profile: ["docker", "singularity"] + profile: ["docker"] # Only test minimal version NXF_VER: ["23.10.0"] # Always run default test @@ -88,11 +88,8 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Set up Singularity - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-singularity@v5 - with: - singularity-version: 3.7.1 + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Run nf-test uses: Wandalen/wretry.action@v1.0.11 From 12d3ab6fa4406c64d4f0ce4d537d04d2ae36ce1e Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 30 Mar 2024 20:57:23 +0100 Subject: [PATCH 13/15] update test --- tests/pipeline/gens_pon.config | 14 ++++++++++++++ tests/pipeline/gens_pon.nf.test | 8 +++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/pipeline/gens_pon.config b/tests/pipeline/gens_pon.config index 0a48b59..c026504 100644 --- a/tests/pipeline/gens_pon.config +++ b/tests/pipeline/gens_pon.config @@ -5,3 +5,17 @@ process { } } + + +profiles { + docker { + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u root' + } +} diff --git a/tests/pipeline/gens_pon.nf.test b/tests/pipeline/gens_pon.nf.test index 2e06852..b4ff563 100644 --- a/tests/pipeline/gens_pon.nf.test +++ b/tests/pipeline/gens_pon.nf.test @@ -4,7 +4,7 @@ nextflow_workflow { script "subworkflows/local/gens_pon.nf" workflow "GENS_PON" tag "gens" - config "tests/pipeline/gens_pon.config" + config "./gens_pon.config" test("Run gens test") { @@ -15,14 +15,12 @@ nextflow_workflow { gens_bin_length = 100 tools = 'gens' fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - fai = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai" - dict = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.dict" } workflow { """ - input[0] = Channel.fromPath(params.dict).map { dict -> [[id:dict.baseName],dict]}.collect() - input[1] = Channel.fromPath(params.fai).map { fai -> [[id:fai.baseName],fai]}.collect() + input[0] = Channel.empty() + input[1] = Channel.empty() input[2] = Channel.fromPath(params.fasta).map { fasta -> [[id:fasta.baseName],fasta]}.collect() input[3] = Channel.of( [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam"),[]], From 027ba23d550c72c16f177194c8a6ff5e9bb5e67c Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 30 Mar 2024 23:24:14 +0100 Subject: [PATCH 14/15] format --- docs/usage.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 41b687a..aa872d0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -129,10 +129,12 @@ process { If you are running the pipeline to generate references for the gens workflow, you should ensure that you have provided all the mandatory options specified in the table below. -| Mandatory | Optional | -| ------------- | -------- | -| fasta/genomes | fai | -| | dict | +| Mandatory | Optional | +| ------------- | -------------------- | +| fasta/genomes | fai | +| | dict | +| | gens_bin_length | +| | gens_readcount_format| ### germlinecnvcaller From 9674a9f0b326e2a8debc86379932d30f34f5e879 Mon Sep 17 00:00:00 2001 From: ramprasadn <20065894+ramprasadn@users.noreply.github.com> Date: Sat, 30 Mar 2024 23:24:39 +0100 Subject: [PATCH 15/15] typo --- docs/usage.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index aa872d0..e84c726 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -129,12 +129,12 @@ process { If you are running the pipeline to generate references for the gens workflow, you should ensure that you have provided all the mandatory options specified in the table below. -| Mandatory | Optional | -| ------------- | -------------------- | -| fasta/genomes | fai | -| | dict | -| | gens_bin_length | -| | gens_readcount_format| +| Mandatory | Optional | +| ------------- | --------------------- | +| fasta/genomes | fai | +| | dict | +| | gens_bin_length | +| | gens_readcount_format | ### germlinecnvcaller