From 5ab10af8bbe8116846c09fadc1c9736c5202bf31 Mon Sep 17 00:00:00 2001
From: Stephen Watts <hello@stephen.ac>
Date: Wed, 24 Apr 2024 17:19:56 +1000
Subject: [PATCH] Refactor reference data parameters

---
 conf/test.config                             |   2 +-
 conf/test_stub.config                        |   6 +-
 lib/Utils.groovy                             |  26 ++---
 lib/WorkflowMain.groovy                      | 100 +++++++++----------
 main.nf                                      |  24 ++---
 modules/local/linx/somatic/main.nf           |   3 -
 modules/local/linx/somatic/meta.yml          |   4 -
 nextflow.config                              |  59 +++++++----
 nextflow_schema.json                         |  13 +--
 subworkflows/local/linx_annotation/main.nf   |   2 -
 subworkflows/local/prepare_reference/main.nf |  60 +++++------
 tests/main.stub.nf.test                      |  10 +-
 workflows/targeted.nf                        |  11 +-
 workflows/wgts.nf                            |  13 +--
 14 files changed, 155 insertions(+), 178 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index d9c81020..473d6a44 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -29,5 +29,5 @@ params {
     mode   = 'wgts'
     genome = 'GRCh38_hmf'
 
-    virusbreakenddb_path = 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/oncoanalyser/test_data/reference_data/virusbreakend/virusbreakenddb_test.tar.gz'
+    ref_data_virusbreakenddb_path = 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/oncoanalyser/test_data/reference_data/virusbreakend/virusbreakenddb_test.tar.gz'
 }
diff --git a/conf/test_stub.config b/conf/test_stub.config
index a19120f3..9a068d4a 100644
--- a/conf/test_stub.config
+++ b/conf/test_stub.config
@@ -43,9 +43,9 @@ params {
 
     }
 
-    virusbreakenddb_path = "temp/virusbreakenddb_20210401/"
-    hmf_data_path        = "temp/hmf_bundle_38/"
-    panel_data_path      = "temp/panel_bundle/tso500_38/"
+    ref_data_virusbreakenddb_path = "temp/virusbreakenddb_20210401/"
+    ref_data_hmf_data_path        = "temp/hmf_bundle_38/"
+    ref_data_panel_data_path      = "temp/panel_bundle/tso500_38/"
 
     // Analysis config
     mode   = 'wgts'
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
index 7604a344..e8fd6338 100644
--- a/lib/Utils.groovy
+++ b/lib/Utils.groovy
@@ -196,26 +196,26 @@ class Utils {
     public static void createStubPlaceholders(params) {
 
         def fps = [
-            params.ref_data.genome_fasta,
-            params.ref_data.genome_fai,
-            params.ref_data.genome_dict,
-            params.ref_data.genome_bwa_index,
-            params.ref_data.genome_bwa_index_image,
-            params.ref_data.genome_bwa_index_bseq,
-            params.ref_data.genome_bwa_index_biidx,
-            params.ref_data.genome_gridss_index,
-            params.ref_data.virusbreakenddb_path,
+            params.ref_data_genome_fasta,
+            params.ref_data_genome_fai,
+            params.ref_data_genome_dict,
+            params.ref_data_genome_bwa_index,
+            params.ref_data_genome_bwa_index_image,
+            params.ref_data_genome_bwa_index_bseq,
+            params.ref_data_genome_bwa_index_biidx,
+            params.ref_data_genome_gridss_index,
+            params.ref_data_virusbreakenddb_path,
         ]
 
-        params.hmf_data_paths[params.ref_data.genome_version]
+        params.hmf_data_paths[params.ref_data_genome_version]
             .each { k, v ->
-                fps << "${params.hmf_data_path.replaceAll('/$', '')}/${v}"
+                fps << "${params.ref_data_hmf_data_path.replaceAll('/$', '')}/${v}"
             }
 
         if(params.panel !== null) {
-            params.panel_data_paths[params.panel][params.ref_data.genome_version]
+            params.panel_data_paths[params.panel][params.ref_data_genome_version]
                 .each { k, v ->
-                    fps << "${params.panel_data_path.replaceAll('/$', '')}/${v}"
+                    fps << "${params.ref_data_panel_data_path.replaceAll('/$', '')}/${v}"
                 }
         }
 
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 6e9cb85c..91a2f979 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -15,34 +15,34 @@ class WorkflowMain {
 
         // Set defaults common to all run configuration
 
-        if (params.genome_version !== null) {
-            params.ref_data.genome_version = params.genome_version.toString()
-        } else if (Constants.GENOMES_VERSION_37.contains(params.genome)) {
-            params.ref_data.genome_version = '37'
-        } else if (Constants.GENOMES_VERSION_38.contains(params.genome)) {
-            params.ref_data.genome_version = '38'
-        } else {
-            default_invalid = true
+        if (!params.containsKey('ref_data_genome_version')) {
+            if (Constants.GENOMES_VERSION_37.contains(params.genome)) {
+                params.ref_data_genome_version = '37'
+            } else if (Constants.GENOMES_VERSION_38.contains(params.genome)) {
+                params.ref_data_genome_version = '38'
+            } else {
+                default_invalid = true
+            }
         }
 
-        if (params.genome_type !== null) {
-            params.ref_data.genome_type = params.genome_type
-        } else if (Constants.GENOMES_ALT.contains(params.genome)) {
-            params.ref_data.genome_type = 'alt'
-        } else if (Constants.GENOMES_DEFINED.contains(params.genome)) {
-            params.ref_data.genome_type = 'no_alt'
-        } else {
-            default_invalid = true
+        if (!params.containsKey('ref_data_genome_type')) {
+            if (Constants.GENOMES_ALT.contains(params.genome)) {
+                params.ref_data_genome_type = 'alt'
+            } else if (Constants.GENOMES_DEFINED.contains(params.genome)) {
+                params.ref_data_genome_type = 'no_alt'
+            } else {
+                default_invalid = true
+            }
         }
 
-        if (params.hmf_data_path !== null) {
-            params.ref_data.hmf_data_path = params.hmf_data_path
-        } else if (params.ref_data.genome_version == '37') {
-            params.ref_data.hmf_data_path = Constants.HMF_DATA_37_PATH
-        } else if (params.ref_data.genome_version == '38') {
-            params.ref_data.hmf_data_path = Constants.HMF_DATA_38_PATH
-        } else {
-            default_invalid = true
+        if (!params.containsKey('ref_hmf_data_path')) {
+            if (params.ref_data_genome_version == '37') {
+                params.ref_data_hmf_data_path = Constants.HMF_DATA_37_PATH
+            } else if (params.ref_data_genome_version == '38') {
+                params.ref_data_hmf_data_path = Constants.HMF_DATA_38_PATH
+            } else {
+                default_invalid = true
+            }
         }
 
         // Bad configuration, catch in validateParams
@@ -64,13 +64,11 @@ class WorkflowMain {
 
             // Attempt to set default panel data path; make no assumption on valid 'panel' value
 
-            if (params.panel_data_path !== null) {
-                params.ref_data.panel_data_path = params.panel_data_path
-            } else if (params.panel !== null ) {
-                if (params.panel == 'tso500' && params.genome_version == '37') {
-                    params.ref_data.panel_data_path = Constants.TSO500_PANEL_37_PATH
-                } else if (params.panel == 'tso500' && params.genome_version == '38') {
-                    params.ref_data.panel_data_path = Constants.TSO500_PANEL_38_PATH
+            if (!params.containsKey('panel')) {
+                if (params.panel == 'tso500' && params.ref_data_genome_version == '37') {
+                    params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH
+                } else if (params.panel == 'tso500' && params.ref_data_genome_version == '38') {
+                    params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH
                 }
             }
         }
@@ -82,19 +80,13 @@ class WorkflowMain {
             log,
         )
 
-        if (stages.virusinterpreter && run_mode === Constants.RunMode.WGTS) {
-            if (params.virusbreakenddb_path !== null) {
-                params.ref_data.virusbreakenddb_path = params.virusbreakenddb_path
-            } else {
-                params.ref_data.virusbreakenddb_path = Constants.VIRUSBREAKENDDB_PATH
-            }
+        if (!params.containsKey('ref_data_virusbreakenddb_path') && stages.virusinterpreter && run_mode === Constants.RunMode.WGTS){
+            params.ref_data_virusbreakenddb_path = Constants.VIRUSBREAKENDDB_PATH
         }
 
-        if (stages.lilac) {
-            if (params.hla_slice_bed !== null) {
-                params.ref_data.hla_slice_bed = params.hla_slice_bed
-            } else if (params.genome_version == '38' && params.genome_type == 'alt') {
-                params.ref_data.hla_slice_bed = Constants.HLA_SLICE_BED_GRCH38_ALT_PATH
+        if (!params.containsKey('ref_data_hla_slice_bed') && stages.lilac) {
+            if (params.ref_data_genome_version == '38' && params.ref_data_genome_type == 'alt') {
+                params.ref_data_hla_slice_bed = Constants.HLA_SLICE_BED_GRCH38_ALT_PATH
             }
         }
 
@@ -107,36 +99,36 @@ class WorkflowMain {
 
         // Common parameters
 
-        if (!params.ref_data.genome) {
+        if (!params.genome) {
             log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
                 "  Genome must be set using the --genome CLI argument or in a configuration file.\n" +
                 "  Currently, the available genome are:\n" +
                 "  ${params.genomes.keySet().join(", ")}\n" +
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
             System.exit(1)
-        } else if (!params.genomes.containsKey(params.ref_data.genome)) {
+        } else if (!params.genomes.containsKey(params.genome)) {
             log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
-                "  Genome '${params.ref_data.genome}' not found in any config files provided to the pipeline.\n" +
+                "  Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
                 "  Currently, the available genome are:\n" +
                 "  ${params.genomes.keySet().join(", ")}\n" +
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
             System.exit(1)
         }
 
-        if (!Constants.GENOMES_SUPPORTED.contains(params.ref_data.genome)) {
-            if (!params.ref_data.force_genome) {
-                log.error "ERROR: currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but got ${params.ref_data.genome}" +
+        if (!Constants.GENOMES_SUPPORTED.contains(params.genome)) {
+            if (!params.ref_data_force_genome) {
+                log.error "ERROR: currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but got ${params.genome}" +
                     ", please adjust the --genome argument accordingly or override with --force_genome."
                 System.exit(1)
             } else {
                 log.warn "currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but forcing to " +
-                    "proceed with \"${params.ref_data.genome}\""
+                    "proceed with \"${params.genome}\""
             }
         }
 
-        if (!params.ref_data.genome_version) {
+        if (!params.ref_data_genome_version) {
             log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
-                "  Genome version wasn't provided and genome '${params.ref_data.genome}' is not defined in   \n" +
+                "  Genome version wasn't provided and genome '${params.genome}' is not defined in   \n" +
                 "  genome version list.\n" +
                 "  Currently, the list of genomes in the version list include:\n" +
                 "  ${Constants.GENOMES_DEFINED.join(", ")}\n" +
@@ -144,9 +136,9 @@ class WorkflowMain {
             System.exit(1)
         }
 
-        if (!params.ref_data.genome_type) {
+        if (!params.ref_data_genome_type) {
             log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
-                "  Genome type wasn't provided and genome '${params.ref_data.genome}' is not defined in      \n" +
+                "  Genome type wasn't provided and genome '${params.genome}' is not defined in      \n" +
                 "  genome type list.\n" +
                 "  Currently, the list of genomes in the type list include:\n" +
                 "  ${Constants.GENOMES_DEFINED.join(", ")}\n" +
@@ -154,7 +146,7 @@ class WorkflowMain {
             System.exit(1)
         }
 
-        if (!params.ref_data.hmf_data_path) {
+        if (!params.ref_data_hmf_data_path) {
             log.error "ERROR: HMF data path wasn't provided"
             System.exit(1)
         }
diff --git a/main.nf b/main.nf
index e01b75fd..029a77e2 100644
--- a/main.nf
+++ b/main.nf
@@ -31,21 +31,15 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_oncoanaly
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-// Reference config lifted up into params.ref_data to conform to standards/linting
-params.ref_data = [
-    genome: params.genome,
-    force_genome: params.force_genome,
-]
-
-params.ref_data.genome_fasta           = getGenomeAttribute('fasta')
-params.ref_data.genome_fai             = getGenomeAttribute('fai')
-params.ref_data.genome_dict            = getGenomeAttribute('dict')
-params.ref_data.genome_bwa_index       = getGenomeAttribute('bwa_index')
-params.ref_data.genome_bwa_index_image = getGenomeAttribute('bwa_index_image')
-params.ref_data.genome_bwa_index_bseq  = getGenomeAttribute('bwa_index_bseq')
-params.ref_data.genome_bwa_index_biidx = getGenomeAttribute('bwa_index_biidx')
-params.ref_data.genome_gridss_index    = getGenomeAttribute('gridss_index')
-params.ref_data.genome_star_index      = getGenomeAttribute('star_index')
+params.ref_data_genome_fasta           = getGenomeAttribute('fasta')
+params.ref_data_genome_fai             = getGenomeAttribute('fai')
+params.ref_data_genome_dict            = getGenomeAttribute('dict')
+params.ref_data_genome_bwa_index       = getGenomeAttribute('bwa_index')
+params.ref_data_genome_bwa_index_image = getGenomeAttribute('bwa_index_image')
+params.ref_data_genome_bwa_index_bseq  = getGenomeAttribute('bwa_index_bseq')
+params.ref_data_genome_bwa_index_biidx = getGenomeAttribute('bwa_index_biidx')
+params.ref_data_genome_gridss_index    = getGenomeAttribute('gridss_index')
+params.ref_data_genome_star_index      = getGenomeAttribute('star_index')
 
 WorkflowMain.setParamsDefaults(params, log)
 WorkflowMain.validateParams(params, log)
diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf
index fcfbb5dc..a826e94d 100644
--- a/modules/local/linx/somatic/main.nf
+++ b/modules/local/linx/somatic/main.nf
@@ -13,7 +13,6 @@ process LINX_SOMATIC {
     path ensembl_data_resources
     path known_fusion_data
     path driver_gene_panel
-    path gene_id_file
 
     output:
     tuple val(meta), path('linx_somatic/'), emit: annotation_dir
@@ -24,7 +23,6 @@ process LINX_SOMATIC {
 
     script:
     def args = task.ext.args ?: ''
-    def gene_id_file_arg = gene_id_file ? "-gene_id_file ${gene_id_file}" : ''
 
     """
     linx \\
@@ -33,7 +31,6 @@ process LINX_SOMATIC {
         -sample ${meta.sample_id} \\
         -sv_vcf ${purple_dir}/${meta.sample_id}.purple.sv.vcf.gz \\
         -purple_dir ${purple_dir} \\
-        ${gene_id_file_arg} \\
         -ref_genome_version ${genome_ver} \\
         -ensembl_data_dir ${ensembl_data_resources} \\
         -known_fusion_file ${known_fusion_data} \\
diff --git a/modules/local/linx/somatic/meta.yml b/modules/local/linx/somatic/meta.yml
index 2a232313..7e8694dd 100644
--- a/modules/local/linx/somatic/meta.yml
+++ b/modules/local/linx/somatic/meta.yml
@@ -33,10 +33,6 @@ input:
       type: file
       description: Driver Gene Panel file
       pattern: "*.{csv}"
-  - gene_id_file:
-      type: file
-      description: Ensembl gene ID file (optional)
-      pattern: "*.{csv}"
 output:
   - meta:
       type: map
diff --git a/nextflow.config b/nextflow.config
index 2076f74d..55eb5396 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -14,7 +14,6 @@ params {
 
     // Workflow mode
     mode = null
-    panel = null
 
     // Process configuration
     processes_manual  = false
@@ -23,28 +22,23 @@ params {
 
     // Reference genome information; iGenomes is effectively disabled but retained for linting
     genome          = null
-    genome_version  = null
-    genome_type     = null
     force_genome    = false
-
     igenomes_base   = 's3://ngi-igenomes/igenomes'
-    igenomes_ignore = false
+    igenomes_ignore = true
 
     // Other reference data and config exposed to user on CLI
-    hmf_data_path        = null
-    panel_data_path      = null
-    virusbreakenddb_path = null
-    hla_slice_bed        = null
+    max_fastq_records        = 10000000
 
-    max_fastq_records    = 10000000
+    isofox_counts            = null
+    isofox_gc_ratios         = null
+    isofox_gene_ids          = null
+    isofox_tpm_norm          = null
+    isofox_read_length       = null
+    isofox_functions         = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS'
 
-    isofox_counts        = null
-    isofox_gc_ratios     = null
-    isofox_gene_ids      = null
-    isofox_tpm_norm      = null
-    isofox_read_length   = null
+    gridss_config            = null
 
-    gridss_config        = null
+    create_stub_placeholders = false
 
     // Boilerplate options
     outdir           = null
@@ -57,11 +51,6 @@ params {
     help             = false
     version          = false
 
-    // Other workflow inputs and options
-    create_stub_placeholders = false
-    linx_gene_id_file        = null
-    isofox_functions         = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS'
-
     // Config options
     config_profile_name        = null
     config_profile_description = null
@@ -76,10 +65,36 @@ params {
     max_cpus   = 16
     max_time   = '240.h'
 
+    // Parameter lint ignore list
+    // NOTE(SW): entries here are generally have conditional defaults or are for internal use only
+    lint_ignore = [
+        'lint_ignore',
+        'genome_type',
+        'genome_version',
+        'genomes',
+        'hmf_data_paths',
+        'panel',
+        'panel_data_paths',
+        'ref_data',
+        'ref_data_genome_bwa_index',
+        'ref_data_genome_bwa_index_biidx',
+        'ref_data_genome_bwa_index_bseq',
+        'ref_data_genome_bwa_index_image',
+        'ref_data_genome_dict',
+        'ref_data_genome_fai',
+        'ref_data_genome_fasta',
+        'ref_data_genome_gridss_index',
+        'ref_data_genome_star_index',
+        'ref_data_hla_slice_bed',
+        'ref_data_hmf_data_path',
+        'ref_data_panel_data_path',
+        'ref_data_virusbreakenddb_path',
+    ]
+
     // Schema validation default options
     validationFailUnrecognisedParams = false
     validationLenientMode            = true
-    validationSchemaIgnoreParams     = 'genomes,igenomes_base,ref_data,hmf_data_paths,panel_data_paths'
+    validationSchemaIgnoreParams     = "igenomes_base,${lint_ignore.join(',')}"
     validationShowHiddenParams       = false
     validate_params                  = true
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index fcd6c864..d40e01af 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -91,11 +91,6 @@
                     "description": "Path to GRIDSS configuration file.",
                     "fa_icon": "fas fa-cog"
                 },
-                "linx_gene_id_file": {
-                    "type": "string",
-                    "description": "Path to LINX gene ID file.",
-                    "fa_icon": "fas fa-cog"
-                },
                 "isofox_read_length": {
                     "description": "User defined RNA read length used for Isofox.",
                     "fa_icon": "fas fa-cog"
@@ -160,22 +155,22 @@
                     "hidden": true,
                     "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe c lashes between custom parameters and those supplied in `igenomes.config`."
                 },
-                "hmf_data_path": {
+                "ref_data_hmf_data_path": {
                     "type": "string",
                     "description": "Path to HMF data.",
                     "fa_icon": "far fa-folder-open"
                 },
-                "panel_data_path": {
+                "ref_data_panel_data_path": {
                     "type": "string",
                     "description": "Path to panel data.",
                     "fa_icon": "far fa-folder-open"
                 },
-                "virusbreakenddb_path": {
+                "ref_data_virusbreakenddb_path": {
                     "type": "string",
                     "description": "Path to VIRUSBreakend database (directory or tarball).",
                     "fa_icon": "far fa-file-code"
                 },
-                "hla_slice_bed": {
+                "ref_data_hla_slice_bed": {
                     "format": "file-path",
                     "pattern": "^\\S+\\.bed$",
                     "description": "Path to HLA slice BED file.",
diff --git a/subworkflows/local/linx_annotation/main.nf b/subworkflows/local/linx_annotation/main.nf
index c8599584..6cf17747 100644
--- a/subworkflows/local/linx_annotation/main.nf
+++ b/subworkflows/local/linx_annotation/main.nf
@@ -19,7 +19,6 @@ workflow LINX_ANNOTATION {
         ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/
         known_fusion_data      // channel: [mandatory] /path/to/known_fusion_data
         driver_gene_panel      // channel: [mandatory] /path/to/driver_gene_panel
-        gene_id_file           // channel: [optional]  /path/to/linx_gene_id_file
 
     main:
         // Channel for versions.yml files
@@ -128,7 +127,6 @@ workflow LINX_ANNOTATION {
             ensembl_data_resources,
             known_fusion_data,
             driver_gene_panel,
-            gene_id_file,
         )
 
         ch_versions = ch_versions.mix(SOMATIC.out.versions)
diff --git a/subworkflows/local/prepare_reference/main.nf b/subworkflows/local/prepare_reference/main.nf
index 405c00a9..613929af 100644
--- a/subworkflows/local/prepare_reference/main.nf
+++ b/subworkflows/local/prepare_reference/main.nf
@@ -31,7 +31,7 @@ workflow PREPARE_REFERENCE {
         //
         // Set some variables for brevity
         //
-        ch_genome_fasta = file(params.ref_data.genome_fasta)
+        ch_genome_fasta = file(params.ref_data_genome_fasta)
         run_virusinterpreter = run_config.mode !== Constants.RunMode.TARGETED && run_config.stages.virusinterpreter
 
         //
@@ -40,14 +40,14 @@ workflow PREPARE_REFERENCE {
         // The fai and dict files should always be present if using a genome preset. These are
         // always created where they are not present without checking processes to run given they
         // are used in numerous processes and have a neglibile cost to generate.
-        ch_genome_fai = params.ref_data.genome_fai ? file(params.ref_data.genome_fai) : []
-        ch_genome_dict = params.ref_data.genome_dict ? file(params.ref_data.genome_dict) : []
-        if (!params.ref_data.genome_fai) {
+        ch_genome_fai = params.ref_data_genome_fai ? file(params.ref_data_genome_fai) : []
+        ch_genome_dict = params.ref_data_genome_dict ? file(params.ref_data_genome_dict) : []
+        if (!params.ref_data_genome_fai) {
             SAMTOOLS_FAIDX([[:], ch_genome_fasta])
             ch_genome_fai = SAMTOOLS_FAIDX.out.fai.map { meta, fai -> fai }
             ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions)
         }
-        if (!params.ref_data.genome_dict) {
+        if (!params.ref_data_genome_dict) {
             SAMTOOLS_DICT([[:], ch_genome_fasta])
             ch_genome_dict = SAMTOOLS_DICT.out.dict.map { meta, dict -> dict }
             ch_versions = ch_versions.mix(SAMTOOLS_DICT.out.versions)
@@ -56,25 +56,25 @@ workflow PREPARE_REFERENCE {
         //
         // Create BWA index, BWA index image, and GRIDSS index for reference genome if required
         //
-        ch_genome_bwa_index = params.ref_data.genome_bwa_index ? file(params.ref_data.genome_bwa_index) : []
-        ch_genome_bwa_index_image = params.ref_data.genome_gridss_index ? file(params.ref_data.genome_bwa_index_image) : []
-        ch_genome_gridss_index = params.ref_data.genome_gridss_index ? file(params.ref_data.genome_gridss_index) : []
+        ch_genome_bwa_index = params.ref_data_genome_bwa_index ? file(params.ref_data_genome_bwa_index) : []
+        ch_genome_bwa_index_image = params.ref_data_genome_gridss_index ? file(params.ref_data_genome_bwa_index_image) : []
+        ch_genome_gridss_index = params.ref_data_genome_gridss_index ? file(params.ref_data_genome_gridss_index) : []
         if (run_config.has_dna && (run_config.stages.gridss || run_virusinterpreter)) {
             // NOTE(SW): the BWA index directory can be provided as a compressed tarball
-            if (!params.ref_data.genome_bwa_index) {
+            if (!params.ref_data_genome_bwa_index) {
                 BWA_INDEX([[:], ch_genome_fasta])
                 ch_genome_bwa_index = BWA_INDEX.out.index.map { meta, index -> index }
                 ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
-            } else if (params.ref_data.genome_bwa_index.endsWith('.tar.gz')) {
+            } else if (params.ref_data_genome_bwa_index.endsWith('.tar.gz')) {
                 ch_genome_bwa_index_inputs = [
                     [id: 'bwa_index'],
-                    file(params.ref_data.genome_bwa_index),
+                    file(params.ref_data_genome_bwa_index),
                 ]
                 DECOMP_BWA_INDEX(ch_genome_bwa_index_inputs)
                 ch_genome_bwa_index = DECOMP_BWA_INDEX.out.dir
             }
 
-            if (!params.ref_data.genome_bwa_index_image) {
+            if (!params.ref_data_genome_bwa_index_image) {
                 GRIDSS_BWA_INDEX_IMAGE(
                     ch_genome_fasta,
                     ch_genome_fai,
@@ -86,7 +86,7 @@ workflow PREPARE_REFERENCE {
                 ch_genome_bwa_index_image = GRIDSS_BWA_INDEX_IMAGE.out.img
                 ch_versions = ch_versions.mix(GRIDSS_BWA_INDEX_IMAGE.out.versions)
             }
-            if (!params.ref_data.genome_gridss_index) {
+            if (!params.ref_data_genome_gridss_index) {
                 GRIDSS_INDEX(
                     ch_genome_fasta,
                     ch_genome_fai,
@@ -101,17 +101,17 @@ workflow PREPARE_REFERENCE {
         }
 
         // Explicitly set BWA MEM2 index file inputs
-        ch_genome_bwa_index_bseq = file(params.ref_data.genome_bwa_index_bseq)
-        ch_genome_bwa_index_biidx = file(params.ref_data.genome_bwa_index_biidx)
+        ch_genome_bwa_index_bseq = file(params.ref_data_genome_bwa_index_bseq)
+        ch_genome_bwa_index_biidx = file(params.ref_data_genome_bwa_index_biidx)
 
         //
         // Decompress STAR index
         //
-        ch_genome_star_index = params.ref_data.genome_star_index ? file(params.ref_data.genome_star_index) : []
-        if (run_config.has_rna_fastq && run_config.stages.alignment && params.ref_data.genome_star_index.endsWith('.tar.gz')) {
+        ch_genome_star_index = params.ref_data_genome_star_index ? file(params.ref_data_genome_star_index) : []
+        if (run_config.has_rna_fastq && run_config.stages.alignment && params.ref_data_genome_star_index.endsWith('.tar.gz')) {
                 ch_genome_star_index_inputs = [
                     [id: 'star_index'],
-                    file(params.ref_data.genome_star_index),
+                    file(params.ref_data_genome_star_index),
                 ]
                 DECOMP_STAR_INDEX(ch_genome_star_index_inputs)
                 ch_genome_star_index = DECOMP_STAR_INDEX.out.dir
@@ -122,26 +122,26 @@ workflow PREPARE_REFERENCE {
         //
         ch_virusbreakenddb = Channel.empty()
         if (run_config.has_dna && run_virusinterpreter) {
-            if (params.ref_data.virusbreakenddb_path.endsWith('.tar.gz')) {
+            if (params.ref_data_virusbreakenddb_path.endsWith('.tar.gz')) {
                 ch_virusbreakenddb_inputs = [
                     [id: 'virusbreakenddb'],
-                    file(params.ref_data.virusbreakenddb_path),
+                    file(params.ref_data_virusbreakenddb_path),
                 ]
                 DECOMP_VIRUSBREAKEND_DB(ch_virusbreakenddb_inputs)
                 ch_virusbreakenddb = DECOMP_VIRUSBREAKEND_DB.out.dir
             } else {
-                ch_virusbreakenddb = file(params.ref_data.virusbreakenddb_path)
+                ch_virusbreakenddb = file(params.ref_data_virusbreakenddb_path)
             }
         }
 
         //
         // Set HMF reference paths / stage, unpack if required
         //
-        hmf_data_paths = params.hmf_data_paths[params.ref_data.genome_version]
-        if (params.ref_data.hmf_data_path.endsWith('tar.gz')) {
+        hmf_data_paths = params.hmf_data_paths[params.ref_data_genome_version]
+        if (params.ref_data_hmf_data_path.endsWith('tar.gz')) {
             ch_hmf_data_inputs = [
                 [id: 'hmf_data'],
-                file(params.ref_data.hmf_data_path),
+                file(params.ref_data_hmf_data_path),
             ]
             DECOMP_HMF_DATA(ch_hmf_data_inputs)
 
@@ -153,7 +153,7 @@ workflow PREPARE_REFERENCE {
                     return createDataMap(hmf_data_paths, dirpath)
                 }
         } else {
-            ch_hmf_data = createDataMap(hmf_data_paths, params.ref_data.hmf_data_path)
+            ch_hmf_data = createDataMap(hmf_data_paths, params.ref_data_hmf_data_path)
         }
 
         //
@@ -165,12 +165,12 @@ workflow PREPARE_REFERENCE {
             // NOTE(SW): consider approach to implement custom panel support
 
             panel_data_paths_versions = params.panel_data_paths[params.panel]
-            panel_data_paths = panel_data_paths_versions[params.ref_data.genome_version]
+            panel_data_paths = panel_data_paths_versions[params.ref_data_genome_version]
 
-            if (params.ref_data.panel_data_path.endsWith('tar.gz')) {
+            if (params.ref_data_panel_data_path.endsWith('tar.gz')) {
                 ch_panel_data_inputs = [
                     [id: 'panel_data'],
-                    file(params.ref_data.panel_data_path),
+                    file(params.ref_data_panel_data_path),
                 ]
                 DECOMP_PANEL_DATA(ch_panel_data_inputs)
 
@@ -182,7 +182,7 @@ workflow PREPARE_REFERENCE {
                         return createDataMap(panel_data_paths, dirpath)
                     }
             } else {
-                ch_panel_data = createDataMap(panel_data_paths, params.ref_data.panel_data_path)
+                ch_panel_data = createDataMap(panel_data_paths, params.ref_data_panel_data_path)
             }
         }
 
@@ -196,7 +196,7 @@ workflow PREPARE_REFERENCE {
         genome_bwa_index_image = ch_genome_bwa_index_image      // path: genome_bwa_index_image
         genome_gridss_index    = ch_genome_gridss_index         // path: genome_gridss_index
         genome_star_index      = ch_genome_star_index           // path: genome_star_index
-        genome_version         = params.ref_data.genome_version // val:  genome_version
+        genome_version         = params.ref_data_genome_version // val:  genome_version
 
         virusbreakenddb        = ch_virusbreakenddb             // path: VIRUSBreakend database
         hmf_data               = ch_hmf_data                    // map:  HMF data paths
diff --git a/tests/main.stub.nf.test b/tests/main.stub.nf.test
index 3775d056..a4dc4a54 100644
--- a/tests/main.stub.nf.test
+++ b/tests/main.stub.nf.test
@@ -15,8 +15,8 @@ nextflow_pipeline {
                 input "${baseDir}/tests/samplesheets/wgts.dna_rna.single.stub.csv"
                 outdir "${outputDir}"
 
-                virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/"
-                hmf_data_path "${workDir}/reference_data/hmf_data/"
+                ref_data_virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/"
+                ref_data_hmf_data_path "${workDir}/reference_data/hmf_data/"
 
                 max_cpus = 1
                 max_memory = "10.GB"
@@ -44,9 +44,9 @@ nextflow_pipeline {
                 input "${baseDir}/tests/samplesheets/targeted.tso500.dna_rna.single.stub.csv"
                 outdir "${outputDir}"
 
-                virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/"
-                hmf_data_path "${workDir}/reference_data/hmf_data/"
-                panel_data_path "${workDir}/reference_data/panel_data/"
+                ref_data_virusbreakenddb_path "${workDir}/reference_data/virusbreakenddb_20210401/"
+                ref_data_hmf_data_path "${workDir}/reference_data/hmf_data/"
+                ref_data_panel_data_path "${workDir}/reference_data/panel_data/"
 
                 max_cpus 1
                 max_memory "10.GB"
diff --git a/workflows/targeted.nf b/workflows/targeted.nf
index fc72b747..99dcc1c9 100644
--- a/workflows/targeted.nf
+++ b/workflows/targeted.nf
@@ -24,7 +24,6 @@ def checkPathParamList = [
     params.isofox_gc_ratios,
     params.isofox_gene_ids,
     params.isofox_tpm_norm,
-    params.linx_gene_id_file,
 ]
 
 // Conditional requirements
@@ -35,8 +34,8 @@ if (run_config.stages.gridss) {
 }
 
 if (run_config.stages.lilac) {
-    if (params.ref_data.genome_version == '38' && params.ref_data.genome_type == 'alt' && params.ref_data.containsKey('hla_slice_bed')) {
-        checkPathParamList.add(params.ref_data.hla_slice_bed)
+    if (params.ref_data_genome_version == '38' && params.ref_data_genome_type == 'alt' && params.ref_data_containsKey('hla_slice_bed')) {
+        checkPathParamList.add(params.ref_data_hla_slice_bed)
     }
 }
 
@@ -46,9 +45,6 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
 
-// Create Path objects for some input files
-linx_gene_id_file = params.linx_gene_id_file ? file(params.linx_gene_id_file) : []
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
@@ -512,7 +508,6 @@ workflow TARGETED {
             hmf_data.ensembl_data_resources,
             hmf_data.known_fusion_data,
             panel_data.driver_gene_panel,
-            linx_gene_id_file,
         )
 
         ch_versions = ch_versions.mix(LINX_ANNOTATION.out.versions)
@@ -613,7 +608,7 @@ workflow TARGETED {
     if (run_config.stages.lilac) {
 
         // Set HLA slice BED if provided in params
-        ref_data_hla_slice_bed = params.ref_data.containsKey('hla_slice_bed') ? params.ref_data.hla_slice_bed : []
+        ref_data_hla_slice_bed = params.containsKey('ref_data_hla_slice_bed') ? params.ref_data_hla_slice_bed : []
 
         LILAC_CALLING(
             ch_inputs,
diff --git a/workflows/wgts.nf b/workflows/wgts.nf
index 832988d9..9385979b 100644
--- a/workflows/wgts.nf
+++ b/workflows/wgts.nf
@@ -21,7 +21,6 @@ Utils.validateInput(inputs, run_config, log)
 def checkPathParamList = [
     params.isofox_counts,
     params.isofox_gc_ratios,
-    params.linx_gene_id_file,
 ]
 
 // Conditional requirements
@@ -33,12 +32,12 @@ if (run_config.stages.gridss) {
 
 // Mode check required as evaluated regardless of workflow selection
 if (run_config.stages.virusinterpreter && run_config.mode !== Constants.RunMode.TARGETED) {
-    checkPathParamList.add(params.virusbreakenddb_path)
+    checkPathParamList.add(params.ref_data_virusbreakenddb_path)
 }
 
 if (run_config.stages.lilac) {
-    if (params.ref_data.genome_version == '38' && params.ref_data.genome_type == 'alt' && params.ref_data.containsKey('hla_slice_bed')) {
-        checkPathParamList.add(params.ref_data.hla_slice_bed)
+    if (params.ref_data_genome_version == '38' && params.ref_data_genome_type == 'alt' && params.ref_data_containsKey('hla_slice_bed')) {
+        checkPathParamList.add(params.ref_data_hla_slice_bed)
     }
 }
 
@@ -48,9 +47,6 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 // Check mandatory parameters
 if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
 
-// Create Path objects for some input files
-linx_gene_id_file = params.linx_gene_id_file ? file(params.linx_gene_id_file) : []
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
@@ -511,7 +507,6 @@ workflow WGTS {
             hmf_data.ensembl_data_resources,
             hmf_data.known_fusion_data,
             hmf_data.driver_gene_panel,
-            linx_gene_id_file,
         )
 
         ch_versions = ch_versions.mix(LINX_ANNOTATION.out.versions)
@@ -658,7 +653,7 @@ workflow WGTS {
     if (run_config.stages.lilac) {
 
         // Use HLA slice BED if provided in params or set as default requirement
-        ref_data_hla_slice_bed = params.ref_data.containsKey('hla_slice_bed') ? params.ref_data.hla_slice_bed : []
+        ref_data_hla_slice_bed = params.containsKey('ref_data_hla_slice_bed') ? params.ref_data_hla_slice_bed : []
 
         LILAC_CALLING(
             ch_inputs,