From 5ab10af8bbe8116846c09fadc1c9736c5202bf31 Mon Sep 17 00:00:00 2001 From: Stephen Watts Date: Wed, 24 Apr 2024 17:19:56 +1000 Subject: [PATCH] Refactor reference data parameters --- conf/test.config | 2 +- conf/test_stub.config | 6 +- lib/Utils.groovy | 26 ++--- lib/WorkflowMain.groovy | 100 +++++++++---------- main.nf | 24 ++--- modules/local/linx/somatic/main.nf | 3 - modules/local/linx/somatic/meta.yml | 4 - nextflow.config | 59 +++++++---- nextflow_schema.json | 13 +-- subworkflows/local/linx_annotation/main.nf | 2 - subworkflows/local/prepare_reference/main.nf | 60 +++++------ tests/main.stub.nf.test | 10 +- workflows/targeted.nf | 11 +- workflows/wgts.nf | 13 +-- 14 files changed, 155 insertions(+), 178 deletions(-) diff --git a/conf/test.config b/conf/test.config index d9c81020..473d6a44 100644 --- a/conf/test.config +++ b/conf/test.config @@ -29,5 +29,5 @@ params { mode = 'wgts' genome = 'GRCh38_hmf' - virusbreakenddb_path = 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/oncoanalyser/test_data/reference_data/virusbreakend/virusbreakenddb_test.tar.gz' + ref_data_virusbreakenddb_path = 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/oncoanalyser/test_data/reference_data/virusbreakend/virusbreakenddb_test.tar.gz' } diff --git a/conf/test_stub.config b/conf/test_stub.config index a19120f3..9a068d4a 100644 --- a/conf/test_stub.config +++ b/conf/test_stub.config @@ -43,9 +43,9 @@ params { } - virusbreakenddb_path = "temp/virusbreakenddb_20210401/" - hmf_data_path = "temp/hmf_bundle_38/" - panel_data_path = "temp/panel_bundle/tso500_38/" + ref_data_virusbreakenddb_path = "temp/virusbreakenddb_20210401/" + ref_data_hmf_data_path = "temp/hmf_bundle_38/" + ref_data_panel_data_path = "temp/panel_bundle/tso500_38/" // Analysis config mode = 'wgts' diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 7604a344..e8fd6338 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -196,26 +196,26 @@ class Utils { public static void createStubPlaceholders(params) { def fps = [ - params.ref_data.genome_fasta, - params.ref_data.genome_fai, - params.ref_data.genome_dict, - params.ref_data.genome_bwa_index, - params.ref_data.genome_bwa_index_image, - params.ref_data.genome_bwa_index_bseq, - params.ref_data.genome_bwa_index_biidx, - params.ref_data.genome_gridss_index, - params.ref_data.virusbreakenddb_path, + params.ref_data_genome_fasta, + params.ref_data_genome_fai, + params.ref_data_genome_dict, + params.ref_data_genome_bwa_index, + params.ref_data_genome_bwa_index_image, + params.ref_data_genome_bwa_index_bseq, + params.ref_data_genome_bwa_index_biidx, + params.ref_data_genome_gridss_index, + params.ref_data_virusbreakenddb_path, ] - params.hmf_data_paths[params.ref_data.genome_version] + params.hmf_data_paths[params.ref_data_genome_version] .each { k, v -> - fps << "${params.hmf_data_path.replaceAll('/$', '')}/${v}" + fps << "${params.ref_data_hmf_data_path.replaceAll('/$', '')}/${v}" } if(params.panel !== null) { - params.panel_data_paths[params.panel][params.ref_data.genome_version] + params.panel_data_paths[params.panel][params.ref_data_genome_version] .each { k, v -> - fps << "${params.panel_data_path.replaceAll('/$', '')}/${v}" + fps << "${params.ref_data_panel_data_path.replaceAll('/$', '')}/${v}" } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 6e9cb85c..91a2f979 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -15,34 +15,34 @@ class WorkflowMain { // Set defaults common to all run configuration - if (params.genome_version !== null) { - params.ref_data.genome_version = params.genome_version.toString() - } else if (Constants.GENOMES_VERSION_37.contains(params.genome)) { - params.ref_data.genome_version = '37' - } else if (Constants.GENOMES_VERSION_38.contains(params.genome)) { - params.ref_data.genome_version = '38' - } else { - default_invalid = true + if (!params.containsKey('ref_data_genome_version')) { + if (Constants.GENOMES_VERSION_37.contains(params.genome)) { + params.ref_data_genome_version = '37' + } else if (Constants.GENOMES_VERSION_38.contains(params.genome)) { + params.ref_data_genome_version = '38' + } else { + default_invalid = true + } } - if (params.genome_type !== null) { - params.ref_data.genome_type = params.genome_type - } else if (Constants.GENOMES_ALT.contains(params.genome)) { - params.ref_data.genome_type = 'alt' - } else if (Constants.GENOMES_DEFINED.contains(params.genome)) { - params.ref_data.genome_type = 'no_alt' - } else { - default_invalid = true + if (!params.containsKey('ref_data_genome_type')) { + if (Constants.GENOMES_ALT.contains(params.genome)) { + params.ref_data_genome_type = 'alt' + } else if (Constants.GENOMES_DEFINED.contains(params.genome)) { + params.ref_data_genome_type = 'no_alt' + } else { + default_invalid = true + } } - if (params.hmf_data_path !== null) { - params.ref_data.hmf_data_path = params.hmf_data_path - } else if (params.ref_data.genome_version == '37') { - params.ref_data.hmf_data_path = Constants.HMF_DATA_37_PATH - } else if (params.ref_data.genome_version == '38') { - params.ref_data.hmf_data_path = Constants.HMF_DATA_38_PATH - } else { - default_invalid = true + if (!params.containsKey('ref_hmf_data_path')) { + if (params.ref_data_genome_version == '37') { + params.ref_data_hmf_data_path = Constants.HMF_DATA_37_PATH + } else if (params.ref_data_genome_version == '38') { + params.ref_data_hmf_data_path = Constants.HMF_DATA_38_PATH + } else { + default_invalid = true + } } // Bad configuration, catch in validateParams @@ -64,13 +64,11 @@ class WorkflowMain { // Attempt to set default panel data path; make no assumption on valid 'panel' value - if (params.panel_data_path !== null) { - params.ref_data.panel_data_path = params.panel_data_path - } else if (params.panel !== null ) { - if (params.panel == 'tso500' && params.genome_version == '37') { - params.ref_data.panel_data_path = Constants.TSO500_PANEL_37_PATH - } else if (params.panel == 'tso500' && params.genome_version == '38') { - params.ref_data.panel_data_path = Constants.TSO500_PANEL_38_PATH + if (!params.containsKey('panel')) { + if (params.panel == 'tso500' && params.ref_data_genome_version == '37') { + params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH + } else if (params.panel == 'tso500' && params.ref_data_genome_version == '38') { + params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH } } } @@ -82,19 +80,13 @@ class WorkflowMain { log, ) - if (stages.virusinterpreter && run_mode === Constants.RunMode.WGTS) { - if (params.virusbreakenddb_path !== null) { - params.ref_data.virusbreakenddb_path = params.virusbreakenddb_path - } else { - params.ref_data.virusbreakenddb_path = Constants.VIRUSBREAKENDDB_PATH - } + if (!params.containsKey('ref_data_virusbreakenddb_path') && stages.virusinterpreter && run_mode === Constants.RunMode.WGTS){ + params.ref_data_virusbreakenddb_path = Constants.VIRUSBREAKENDDB_PATH } - if (stages.lilac) { - if (params.hla_slice_bed !== null) { - params.ref_data.hla_slice_bed = params.hla_slice_bed - } else if (params.genome_version == '38' && params.genome_type == 'alt') { - params.ref_data.hla_slice_bed = Constants.HLA_SLICE_BED_GRCH38_ALT_PATH + if (!params.containsKey('ref_data_hla_slice_bed') && stages.lilac) { + if (params.ref_data_genome_version == '38' && params.ref_data_genome_type == 'alt') { + params.ref_data_hla_slice_bed = Constants.HLA_SLICE_BED_GRCH38_ALT_PATH } } @@ -107,36 +99,36 @@ class WorkflowMain { // Common parameters - if (!params.ref_data.genome) { + if (!params.genome) { log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome must be set using the --genome CLI argument or in a configuration file.\n" + " Currently, the available genome are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" System.exit(1) - } else if (!params.genomes.containsKey(params.ref_data.genome)) { + } else if (!params.genomes.containsKey(params.genome)) { log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.ref_data.genome}' not found in any config files provided to the pipeline.\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" System.exit(1) } - if (!Constants.GENOMES_SUPPORTED.contains(params.ref_data.genome)) { - if (!params.ref_data.force_genome) { - log.error "ERROR: currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but got ${params.ref_data.genome}" + + if (!Constants.GENOMES_SUPPORTED.contains(params.genome)) { + if (!params.ref_data_force_genome) { + log.error "ERROR: currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but got ${params.genome}" + ", please adjust the --genome argument accordingly or override with --force_genome." System.exit(1) } else { log.warn "currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but forcing to " + - "proceed with \"${params.ref_data.genome}\"" + "proceed with \"${params.genome}\"" } } - if (!params.ref_data.genome_version) { + if (!params.ref_data_genome_version) { log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome version wasn't provided and genome '${params.ref_data.genome}' is not defined in \n" + + " Genome version wasn't provided and genome '${params.genome}' is not defined in \n" + " genome version list.\n" + " Currently, the list of genomes in the version list include:\n" + " ${Constants.GENOMES_DEFINED.join(", ")}\n" + @@ -144,9 +136,9 @@ class WorkflowMain { System.exit(1) } - if (!params.ref_data.genome_type) { + if (!params.ref_data_genome_type) { log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome type wasn't provided and genome '${params.ref_data.genome}' is not defined in \n" + + " Genome type wasn't provided and genome '${params.genome}' is not defined in \n" + " genome type list.\n" + " Currently, the list of genomes in the type list include:\n" + " ${Constants.GENOMES_DEFINED.join(", ")}\n" + @@ -154,7 +146,7 @@ class WorkflowMain { System.exit(1) } - if (!params.ref_data.hmf_data_path) { + if (!params.ref_data_hmf_data_path) { log.error "ERROR: HMF data path wasn't provided" System.exit(1) } diff --git a/main.nf b/main.nf index e01b75fd..029a77e2 100644 --- a/main.nf +++ b/main.nf @@ -31,21 +31,15 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_oncoanaly ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Reference config lifted up into params.ref_data to conform to standards/linting -params.ref_data = [ - genome: params.genome, - force_genome: params.force_genome, -] - -params.ref_data.genome_fasta = getGenomeAttribute('fasta') -params.ref_data.genome_fai = getGenomeAttribute('fai') -params.ref_data.genome_dict = getGenomeAttribute('dict') -params.ref_data.genome_bwa_index = getGenomeAttribute('bwa_index') -params.ref_data.genome_bwa_index_image = getGenomeAttribute('bwa_index_image') -params.ref_data.genome_bwa_index_bseq = getGenomeAttribute('bwa_index_bseq') -params.ref_data.genome_bwa_index_biidx = getGenomeAttribute('bwa_index_biidx') -params.ref_data.genome_gridss_index = getGenomeAttribute('gridss_index') -params.ref_data.genome_star_index = getGenomeAttribute('star_index') +params.ref_data_genome_fasta = getGenomeAttribute('fasta') +params.ref_data_genome_fai = getGenomeAttribute('fai') +params.ref_data_genome_dict = getGenomeAttribute('dict') +params.ref_data_genome_bwa_index = getGenomeAttribute('bwa_index') +params.ref_data_genome_bwa_index_image = getGenomeAttribute('bwa_index_image') +params.ref_data_genome_bwa_index_bseq = getGenomeAttribute('bwa_index_bseq') +params.ref_data_genome_bwa_index_biidx = getGenomeAttribute('bwa_index_biidx') +params.ref_data_genome_gridss_index = getGenomeAttribute('gridss_index') +params.ref_data_genome_star_index = getGenomeAttribute('star_index') WorkflowMain.setParamsDefaults(params, log) WorkflowMain.validateParams(params, log) diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf index fcfbb5dc..a826e94d 100644 --- a/modules/local/linx/somatic/main.nf +++ b/modules/local/linx/somatic/main.nf @@ -13,7 +13,6 @@ process LINX_SOMATIC { path ensembl_data_resources path known_fusion_data path driver_gene_panel - path gene_id_file output: tuple val(meta), path('linx_somatic/'), emit: annotation_dir @@ -24,7 +23,6 @@ process LINX_SOMATIC { script: def args = task.ext.args ?: '' - def gene_id_file_arg = gene_id_file ? "-gene_id_file ${gene_id_file}" : '' """ linx \\ @@ -33,7 +31,6 @@ process LINX_SOMATIC { -sample ${meta.sample_id} \\ -sv_vcf ${purple_dir}/${meta.sample_id}.purple.sv.vcf.gz \\ -purple_dir ${purple_dir} \\ - ${gene_id_file_arg} \\ -ref_genome_version ${genome_ver} \\ -ensembl_data_dir ${ensembl_data_resources} \\ -known_fusion_file ${known_fusion_data} \\ diff --git a/modules/local/linx/somatic/meta.yml b/modules/local/linx/somatic/meta.yml index 2a232313..7e8694dd 100644 --- a/modules/local/linx/somatic/meta.yml +++ b/modules/local/linx/somatic/meta.yml @@ -33,10 +33,6 @@ input: type: file description: Driver Gene Panel file pattern: "*.{csv}" - - gene_id_file: - type: file - description: Ensembl gene ID file (optional) - pattern: "*.{csv}" output: - meta: type: map diff --git a/nextflow.config b/nextflow.config index 2076f74d..55eb5396 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,6 @@ params { // Workflow mode mode = null - panel = null // Process configuration processes_manual = false @@ -23,28 +22,23 @@ params { // Reference genome information; iGenomes is effectively disabled but retained for linting genome = null - genome_version = null - genome_type = null force_genome = false - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false + igenomes_ignore = true // Other reference data and config exposed to user on CLI - hmf_data_path = null - panel_data_path = null - virusbreakenddb_path = null - hla_slice_bed = null + max_fastq_records = 10000000 - max_fastq_records = 10000000 + isofox_counts = null + isofox_gc_ratios = null + isofox_gene_ids = null + isofox_tpm_norm = null + isofox_read_length = null + isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS' - isofox_counts = null - isofox_gc_ratios = null - isofox_gene_ids = null - isofox_tpm_norm = null - isofox_read_length = null + gridss_config = null - gridss_config = null + create_stub_placeholders = false // Boilerplate options outdir = null @@ -57,11 +51,6 @@ params { help = false version = false - // Other workflow inputs and options - create_stub_placeholders = false - linx_gene_id_file = null - isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS' - // Config options config_profile_name = null config_profile_description = null @@ -76,10 +65,36 @@ params { max_cpus = 16 max_time = '240.h' + // Parameter lint ignore list + // NOTE(SW): entries here are generally have conditional defaults or are for internal use only + lint_ignore = [ + 'lint_ignore', + 'genome_type', + 'genome_version', + 'genomes', + 'hmf_data_paths', + 'panel', + 'panel_data_paths', + 'ref_data', + 'ref_data_genome_bwa_index', + 'ref_data_genome_bwa_index_biidx', + 'ref_data_genome_bwa_index_bseq', + 'ref_data_genome_bwa_index_image', + 'ref_data_genome_dict', + 'ref_data_genome_fai', + 'ref_data_genome_fasta', + 'ref_data_genome_gridss_index', + 'ref_data_genome_star_index', + 'ref_data_hla_slice_bed', + 'ref_data_hmf_data_path', + 'ref_data_panel_data_path', + 'ref_data_virusbreakenddb_path', + ] + // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = true - validationSchemaIgnoreParams = 'genomes,igenomes_base,ref_data,hmf_data_paths,panel_data_paths' + validationSchemaIgnoreParams = "igenomes_base,${lint_ignore.join(',')}" validationShowHiddenParams = false validate_params = true diff --git a/nextflow_schema.json b/nextflow_schema.json index fcd6c864..d40e01af 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -91,11 +91,6 @@ "description": "Path to GRIDSS configuration file.", "fa_icon": "fas fa-cog" }, - "linx_gene_id_file": { - "type": "string", - "description": "Path to LINX gene ID file.", - "fa_icon": "fas fa-cog" - }, "isofox_read_length": { "description": "User defined RNA read length used for Isofox.", "fa_icon": "fas fa-cog" @@ -160,22 +155,22 @@ "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe c lashes between custom parameters and those supplied in `igenomes.config`." }, - "hmf_data_path": { + "ref_data_hmf_data_path": { "type": "string", "description": "Path to HMF data.", "fa_icon": "far fa-folder-open" }, - "panel_data_path": { + "ref_data_panel_data_path": { "type": "string", "description": "Path to panel data.", "fa_icon": "far fa-folder-open" }, - "virusbreakenddb_path": { + "ref_data_virusbreakenddb_path": { "type": "string", "description": "Path to VIRUSBreakend database (directory or tarball).", "fa_icon": "far fa-file-code" }, - "hla_slice_bed": { + "ref_data_hla_slice_bed": { "format": "file-path", "pattern": "^\\S+\\.bed$", "description": "Path to HLA slice BED file.", diff --git a/subworkflows/local/linx_annotation/main.nf b/subworkflows/local/linx_annotation/main.nf index c8599584..6cf17747 100644 --- a/subworkflows/local/linx_annotation/main.nf +++ b/subworkflows/local/linx_annotation/main.nf @@ -19,7 +19,6 @@ workflow LINX_ANNOTATION { ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ known_fusion_data // channel: [mandatory] /path/to/known_fusion_data driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel - gene_id_file // channel: [optional] /path/to/linx_gene_id_file main: // Channel for versions.yml files @@ -128,7 +127,6 @@ workflow LINX_ANNOTATION { ensembl_data_resources, known_fusion_data, driver_gene_panel, - gene_id_file, ) ch_versions = ch_versions.mix(SOMATIC.out.versions) diff --git a/subworkflows/local/prepare_reference/main.nf b/subworkflows/local/prepare_reference/main.nf index 405c00a9..613929af 100644 --- a/subworkflows/local/prepare_reference/main.nf +++ b/subworkflows/local/prepare_reference/main.nf @@ -31,7 +31,7 @@ workflow PREPARE_REFERENCE { // // Set some variables for brevity // - ch_genome_fasta = file(params.ref_data.genome_fasta) + ch_genome_fasta = file(params.ref_data_genome_fasta) run_virusinterpreter = run_config.mode !== Constants.RunMode.TARGETED && run_config.stages.virusinterpreter // @@ -40,14 +40,14 @@ workflow PREPARE_REFERENCE { // The fai and dict files should always be present if using a genome preset. These are // always created where they are not present without checking processes to run given they // are used in numerous processes and have a neglibile cost to generate. - ch_genome_fai = params.ref_data.genome_fai ? file(params.ref_data.genome_fai) : [] - ch_genome_dict = params.ref_data.genome_dict ? file(params.ref_data.genome_dict) : [] - if (!params.ref_data.genome_fai) { + ch_genome_fai = params.ref_data_genome_fai ? file(params.ref_data_genome_fai) : [] + ch_genome_dict = params.ref_data_genome_dict ? file(params.ref_data_genome_dict) : [] + if (!params.ref_data_genome_fai) { SAMTOOLS_FAIDX([[:], ch_genome_fasta]) ch_genome_fai = SAMTOOLS_FAIDX.out.fai.map { meta, fai -> fai } ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) } - if (!params.ref_data.genome_dict) { + if (!params.ref_data_genome_dict) { SAMTOOLS_DICT([[:], ch_genome_fasta]) ch_genome_dict = SAMTOOLS_DICT.out.dict.map { meta, dict -> dict } ch_versions = ch_versions.mix(SAMTOOLS_DICT.out.versions) @@ -56,25 +56,25 @@ workflow PREPARE_REFERENCE { // // Create BWA index, BWA index image, and GRIDSS index for reference genome if required // - ch_genome_bwa_index = params.ref_data.genome_bwa_index ? file(params.ref_data.genome_bwa_index) : [] - ch_genome_bwa_index_image = params.ref_data.genome_gridss_index ? file(params.ref_data.genome_bwa_index_image) : [] - ch_genome_gridss_index = params.ref_data.genome_gridss_index ? file(params.ref_data.genome_gridss_index) : [] + ch_genome_bwa_index = params.ref_data_genome_bwa_index ? file(params.ref_data_genome_bwa_index) : [] + ch_genome_bwa_index_image = params.ref_data_genome_gridss_index ? file(params.ref_data_genome_bwa_index_image) : [] + ch_genome_gridss_index = params.ref_data_genome_gridss_index ? file(params.ref_data_genome_gridss_index) : [] if (run_config.has_dna && (run_config.stages.gridss || run_virusinterpreter)) { // NOTE(SW): the BWA index directory can be provided as a compressed tarball - if (!params.ref_data.genome_bwa_index) { + if (!params.ref_data_genome_bwa_index) { BWA_INDEX([[:], ch_genome_fasta]) ch_genome_bwa_index = BWA_INDEX.out.index.map { meta, index -> index } ch_versions = ch_versions.mix(BWA_INDEX.out.versions) - } else if (params.ref_data.genome_bwa_index.endsWith('.tar.gz')) { + } else if (params.ref_data_genome_bwa_index.endsWith('.tar.gz')) { ch_genome_bwa_index_inputs = [ [id: 'bwa_index'], - file(params.ref_data.genome_bwa_index), + file(params.ref_data_genome_bwa_index), ] DECOMP_BWA_INDEX(ch_genome_bwa_index_inputs) ch_genome_bwa_index = DECOMP_BWA_INDEX.out.dir } - if (!params.ref_data.genome_bwa_index_image) { + if (!params.ref_data_genome_bwa_index_image) { GRIDSS_BWA_INDEX_IMAGE( ch_genome_fasta, ch_genome_fai, @@ -86,7 +86,7 @@ workflow PREPARE_REFERENCE { ch_genome_bwa_index_image = GRIDSS_BWA_INDEX_IMAGE.out.img ch_versions = ch_versions.mix(GRIDSS_BWA_INDEX_IMAGE.out.versions) } - if (!params.ref_data.genome_gridss_index) { + if (!params.ref_data_genome_gridss_index) { GRIDSS_INDEX( ch_genome_fasta, ch_genome_fai, @@ -101,17 +101,17 @@ workflow PREPARE_REFERENCE { } // Explicitly set BWA MEM2 index file inputs - ch_genome_bwa_index_bseq = file(params.ref_data.genome_bwa_index_bseq) - ch_genome_bwa_index_biidx = file(params.ref_data.genome_bwa_index_biidx) + ch_genome_bwa_index_bseq = file(params.ref_data_genome_bwa_index_bseq) + ch_genome_bwa_index_biidx = file(params.ref_data_genome_bwa_index_biidx) // // Decompress STAR index // - ch_genome_star_index = params.ref_data.genome_star_index ? file(params.ref_data.genome_star_index) : [] - if (run_config.has_rna_fastq && run_config.stages.alignment && params.ref_data.genome_star_index.endsWith('.tar.gz')) { + ch_genome_star_index = params.ref_data_genome_star_index ? file(params.ref_data_genome_star_index) : [] + if (run_config.has_rna_fastq && run_config.stages.alignment && params.ref_data_genome_star_index.endsWith('.tar.gz')) { ch_genome_star_index_inputs = [ [id: 'star_index'], - file(params.ref_data.genome_star_index), + file(params.ref_data_genome_star_index), ] DECOMP_STAR_INDEX(ch_genome_star_index_inputs) ch_genome_star_index = DECOMP_STAR_INDEX.out.dir @@ -122,26 +122,26 @@ workflow PREPARE_REFERENCE { // ch_virusbreakenddb = Channel.empty() if (run_config.has_dna && run_virusinterpreter) { - if (params.ref_data.virusbreakenddb_path.endsWith('.tar.gz')) { + if (params.ref_data_virusbreakenddb_path.endsWith('.tar.gz')) { ch_virusbreakenddb_inputs = [ [id: 'virusbreakenddb'], - file(params.ref_data.virusbreakenddb_path), + file(params.ref_data_virusbreakenddb_path), ] DECOMP_VIRUSBREAKEND_DB(ch_virusbreakenddb_inputs) ch_virusbreakenddb = DECOMP_VIRUSBREAKEND_DB.out.dir } else { - ch_virusbreakenddb = file(params.ref_data.virusbreakenddb_path) + ch_virusbreakenddb = file(params.ref_data_virusbreakenddb_path) } } // // Set HMF reference paths / stage, unpack if required // - hmf_data_paths = params.hmf_data_paths[params.ref_data.genome_version] - if (params.ref_data.hmf_data_path.endsWith('tar.gz')) { + hmf_data_paths = params.hmf_data_paths[params.ref_data_genome_version] + if (params.ref_data_hmf_data_path.endsWith('tar.gz')) { ch_hmf_data_inputs = [ [id: 'hmf_data'], - file(params.ref_data.hmf_data_path), + file(params.ref_data_hmf_data_path), ] DECOMP_HMF_DATA(ch_hmf_data_inputs) @@ -153,7 +153,7 @@ workflow PREPARE_REFERENCE { return createDataMap(hmf_data_paths, dirpath) } } else { - ch_hmf_data = createDataMap(hmf_data_paths, params.ref_data.hmf_data_path) + ch_hmf_data = createDataMap(hmf_data_paths, params.ref_data_hmf_data_path) } // @@ -165,12 +165,12 @@ workflow PREPARE_REFERENCE { // NOTE(SW): consider approach to implement custom panel support panel_data_paths_versions = params.panel_data_paths[params.panel] - panel_data_paths = panel_data_paths_versions[params.ref_data.genome_version] + panel_data_paths = panel_data_paths_versions[params.ref_data_genome_version] - if (params.ref_data.panel_data_path.endsWith('tar.gz')) { + if (params.ref_data_panel_data_path.endsWith('tar.gz')) { ch_panel_data_inputs = [ [id: 'panel_data'], - file(params.ref_data.panel_data_path), + file(params.ref_data_panel_data_path), ] DECOMP_PANEL_DATA(ch_panel_data_inputs) @@ -182,7 +182,7 @@ workflow PREPARE_REFERENCE { return createDataMap(panel_data_paths, dirpath) } } else { - ch_panel_data = createDataMap(panel_data_paths, params.ref_data.panel_data_path) + ch_panel_data = createDataMap(panel_data_paths, params.ref_data_panel_data_path) } } @@ -196,7 +196,7 @@ workflow PREPARE_REFERENCE { genome_bwa_index_image = ch_genome_bwa_index_image // path: genome_bwa_index_image genome_gridss_index = ch_genome_gridss_index // path: genome_gridss_index genome_star_index = ch_genome_star_index // path: genome_star_index - genome_version = params.ref_data.genome_version // val: genome_version + genome_version = params.ref_data_genome_version // val: genome_version virusbreakenddb = ch_virusbreakenddb // path: VIRUSBreakend database hmf_data = ch_hmf_data // map: HMF data paths diff --git a/tests/main.stub.nf.test b/tests/main.stub.nf.test index 3775d056..a4dc4a54 100644 --- a/tests/main.stub.nf.test +++ b/tests/main.stub.nf.test @@ -15,8 +15,8 @@ nextflow_pipeline { input "${baseDir}/tests/samplesheets/wgts.dna_rna.single.stub.csv" outdir "${outputDir}" - virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/" - hmf_data_path "${workDir}/reference_data/hmf_data/" + ref_data_virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/" + ref_data_hmf_data_path "${workDir}/reference_data/hmf_data/" max_cpus = 1 max_memory = "10.GB" @@ -44,9 +44,9 @@ nextflow_pipeline { input "${baseDir}/tests/samplesheets/targeted.tso500.dna_rna.single.stub.csv" outdir "${outputDir}" - virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/" - hmf_data_path "${workDir}/reference_data/hmf_data/" - panel_data_path "${workDir}/reference_data/panel_data/" + ref_data_virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/" + ref_data_hmf_data_path "${workDir}/reference_data/hmf_data/" + ref_data_panel_data_path "${workDir}/reference_data/panel_data/" max_cpus 1 max_memory "10.GB" diff --git a/workflows/targeted.nf b/workflows/targeted.nf index fc72b747..99dcc1c9 100644 --- a/workflows/targeted.nf +++ b/workflows/targeted.nf @@ -24,7 +24,6 @@ def checkPathParamList = [ params.isofox_gc_ratios, params.isofox_gene_ids, params.isofox_tpm_norm, - params.linx_gene_id_file, ] // Conditional requirements @@ -35,8 +34,8 @@ if (run_config.stages.gridss) { } if (run_config.stages.lilac) { - if (params.ref_data.genome_version == '38' && params.ref_data.genome_type == 'alt' && params.ref_data.containsKey('hla_slice_bed')) { - checkPathParamList.add(params.ref_data.hla_slice_bed) + if (params.ref_data_genome_version == '38' && params.ref_data_genome_type == 'alt' && params.ref_data_containsKey('hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) } } @@ -46,9 +45,6 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -// Create Path objects for some input files -linx_gene_id_file = params.linx_gene_id_file ? file(params.linx_gene_id_file) : [] - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS @@ -512,7 +508,6 @@ workflow TARGETED { hmf_data.ensembl_data_resources, hmf_data.known_fusion_data, panel_data.driver_gene_panel, - linx_gene_id_file, ) ch_versions = ch_versions.mix(LINX_ANNOTATION.out.versions) @@ -613,7 +608,7 @@ workflow TARGETED { if (run_config.stages.lilac) { // Set HLA slice BED if provided in params - ref_data_hla_slice_bed = params.ref_data.containsKey('hla_slice_bed') ? params.ref_data.hla_slice_bed : [] + ref_data_hla_slice_bed = params.containsKey('ref_data_hla_slice_bed') ? params.ref_data_hla_slice_bed : [] LILAC_CALLING( ch_inputs, diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 832988d9..9385979b 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -21,7 +21,6 @@ Utils.validateInput(inputs, run_config, log) def checkPathParamList = [ params.isofox_counts, params.isofox_gc_ratios, - params.linx_gene_id_file, ] // Conditional requirements @@ -33,12 +32,12 @@ if (run_config.stages.gridss) { // Mode check required as evaluated regardless of workflow selection if (run_config.stages.virusinterpreter && run_config.mode !== Constants.RunMode.TARGETED) { - checkPathParamList.add(params.virusbreakenddb_path) + checkPathParamList.add(params.ref_data_virusbreakenddb_path) } if (run_config.stages.lilac) { - if (params.ref_data.genome_version == '38' && params.ref_data.genome_type == 'alt' && params.ref_data.containsKey('hla_slice_bed')) { - checkPathParamList.add(params.ref_data.hla_slice_bed) + if (params.ref_data_genome_version == '38' && params.ref_data_genome_type == 'alt' && params.ref_data_containsKey('hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) } } @@ -48,9 +47,6 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true // Check mandatory parameters if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -// Create Path objects for some input files -linx_gene_id_file = params.linx_gene_id_file ? file(params.linx_gene_id_file) : [] - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS @@ -511,7 +507,6 @@ workflow WGTS { hmf_data.ensembl_data_resources, hmf_data.known_fusion_data, hmf_data.driver_gene_panel, - linx_gene_id_file, ) ch_versions = ch_versions.mix(LINX_ANNOTATION.out.versions) @@ -658,7 +653,7 @@ workflow WGTS { if (run_config.stages.lilac) { // Use HLA slice BED if provided in params or set as default requirement - ref_data_hla_slice_bed = params.ref_data.containsKey('hla_slice_bed') ? params.ref_data.hla_slice_bed : [] + ref_data_hla_slice_bed = params.containsKey('ref_data_hla_slice_bed') ? params.ref_data_hla_slice_bed : [] LILAC_CALLING( ch_inputs,