Skip to content

Commit

Permalink
Refactor reference data parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
scwatts committed Apr 30, 2024
1 parent 44cc52e commit 5ab10af
Show file tree
Hide file tree
Showing 14 changed files with 155 additions and 178 deletions.
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ params {
mode = 'wgts'
genome = 'GRCh38_hmf'

virusbreakenddb_path = 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/oncoanalyser/test_data/reference_data/virusbreakend/virusbreakenddb_test.tar.gz'
ref_data_virusbreakenddb_path = 'https://pub-29f2e5b2b7384811bdbbcba44f8b5083.r2.dev/oncoanalyser/test_data/reference_data/virusbreakend/virusbreakenddb_test.tar.gz'
}
6 changes: 3 additions & 3 deletions conf/test_stub.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ params {

}

virusbreakenddb_path = "temp/virusbreakenddb_20210401/"
hmf_data_path = "temp/hmf_bundle_38/"
panel_data_path = "temp/panel_bundle/tso500_38/"
ref_data_virusbreakenddb_path = "temp/virusbreakenddb_20210401/"
ref_data_hmf_data_path = "temp/hmf_bundle_38/"
ref_data_panel_data_path = "temp/panel_bundle/tso500_38/"

// Analysis config
mode = 'wgts'
Expand Down
26 changes: 13 additions & 13 deletions lib/Utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -196,26 +196,26 @@ class Utils {
public static void createStubPlaceholders(params) {

def fps = [
params.ref_data.genome_fasta,
params.ref_data.genome_fai,
params.ref_data.genome_dict,
params.ref_data.genome_bwa_index,
params.ref_data.genome_bwa_index_image,
params.ref_data.genome_bwa_index_bseq,
params.ref_data.genome_bwa_index_biidx,
params.ref_data.genome_gridss_index,
params.ref_data.virusbreakenddb_path,
params.ref_data_genome_fasta,
params.ref_data_genome_fai,
params.ref_data_genome_dict,
params.ref_data_genome_bwa_index,
params.ref_data_genome_bwa_index_image,
params.ref_data_genome_bwa_index_bseq,
params.ref_data_genome_bwa_index_biidx,
params.ref_data_genome_gridss_index,
params.ref_data_virusbreakenddb_path,
]

params.hmf_data_paths[params.ref_data.genome_version]
params.hmf_data_paths[params.ref_data_genome_version]
.each { k, v ->
fps << "${params.hmf_data_path.replaceAll('/$', '')}/${v}"
fps << "${params.ref_data_hmf_data_path.replaceAll('/$', '')}/${v}"
}

if(params.panel !== null) {
params.panel_data_paths[params.panel][params.ref_data.genome_version]
params.panel_data_paths[params.panel][params.ref_data_genome_version]
.each { k, v ->
fps << "${params.panel_data_path.replaceAll('/$', '')}/${v}"
fps << "${params.ref_data_panel_data_path.replaceAll('/$', '')}/${v}"
}
}

Expand Down
100 changes: 46 additions & 54 deletions lib/WorkflowMain.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -15,34 +15,34 @@ class WorkflowMain {

// Set defaults common to all run configuration

if (params.genome_version !== null) {
params.ref_data.genome_version = params.genome_version.toString()
} else if (Constants.GENOMES_VERSION_37.contains(params.genome)) {
params.ref_data.genome_version = '37'
} else if (Constants.GENOMES_VERSION_38.contains(params.genome)) {
params.ref_data.genome_version = '38'
} else {
default_invalid = true
if (!params.containsKey('ref_data_genome_version')) {
if (Constants.GENOMES_VERSION_37.contains(params.genome)) {
params.ref_data_genome_version = '37'
} else if (Constants.GENOMES_VERSION_38.contains(params.genome)) {
params.ref_data_genome_version = '38'
} else {
default_invalid = true
}
}

if (params.genome_type !== null) {
params.ref_data.genome_type = params.genome_type
} else if (Constants.GENOMES_ALT.contains(params.genome)) {
params.ref_data.genome_type = 'alt'
} else if (Constants.GENOMES_DEFINED.contains(params.genome)) {
params.ref_data.genome_type = 'no_alt'
} else {
default_invalid = true
if (!params.containsKey('ref_data_genome_type')) {
if (Constants.GENOMES_ALT.contains(params.genome)) {
params.ref_data_genome_type = 'alt'
} else if (Constants.GENOMES_DEFINED.contains(params.genome)) {
params.ref_data_genome_type = 'no_alt'
} else {
default_invalid = true
}
}

if (params.hmf_data_path !== null) {
params.ref_data.hmf_data_path = params.hmf_data_path
} else if (params.ref_data.genome_version == '37') {
params.ref_data.hmf_data_path = Constants.HMF_DATA_37_PATH
} else if (params.ref_data.genome_version == '38') {
params.ref_data.hmf_data_path = Constants.HMF_DATA_38_PATH
} else {
default_invalid = true
if (!params.containsKey('ref_hmf_data_path')) {
if (params.ref_data_genome_version == '37') {
params.ref_data_hmf_data_path = Constants.HMF_DATA_37_PATH
} else if (params.ref_data_genome_version == '38') {
params.ref_data_hmf_data_path = Constants.HMF_DATA_38_PATH
} else {
default_invalid = true
}
}

// Bad configuration, catch in validateParams
Expand All @@ -64,13 +64,11 @@ class WorkflowMain {

// Attempt to set default panel data path; make no assumption on valid 'panel' value

if (params.panel_data_path !== null) {
params.ref_data.panel_data_path = params.panel_data_path
} else if (params.panel !== null ) {
if (params.panel == 'tso500' && params.genome_version == '37') {
params.ref_data.panel_data_path = Constants.TSO500_PANEL_37_PATH
} else if (params.panel == 'tso500' && params.genome_version == '38') {
params.ref_data.panel_data_path = Constants.TSO500_PANEL_38_PATH
if (!params.containsKey('panel')) {
if (params.panel == 'tso500' && params.ref_data_genome_version == '37') {
params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH
} else if (params.panel == 'tso500' && params.ref_data_genome_version == '38') {
params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH
}
}
}
Expand All @@ -82,19 +80,13 @@ class WorkflowMain {
log,
)

if (stages.virusinterpreter && run_mode === Constants.RunMode.WGTS) {
if (params.virusbreakenddb_path !== null) {
params.ref_data.virusbreakenddb_path = params.virusbreakenddb_path
} else {
params.ref_data.virusbreakenddb_path = Constants.VIRUSBREAKENDDB_PATH
}
if (!params.containsKey('ref_data_virusbreakenddb_path') && stages.virusinterpreter && run_mode === Constants.RunMode.WGTS){
params.ref_data_virusbreakenddb_path = Constants.VIRUSBREAKENDDB_PATH
}

if (stages.lilac) {
if (params.hla_slice_bed !== null) {
params.ref_data.hla_slice_bed = params.hla_slice_bed
} else if (params.genome_version == '38' && params.genome_type == 'alt') {
params.ref_data.hla_slice_bed = Constants.HLA_SLICE_BED_GRCH38_ALT_PATH
if (!params.containsKey('ref_data_hla_slice_bed') && stages.lilac) {
if (params.ref_data_genome_version == '38' && params.ref_data_genome_type == 'alt') {
params.ref_data_hla_slice_bed = Constants.HLA_SLICE_BED_GRCH38_ALT_PATH
}
}

Expand All @@ -107,54 +99,54 @@ class WorkflowMain {

// Common parameters

if (!params.ref_data.genome) {
if (!params.genome) {
log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Genome must be set using the --genome CLI argument or in a configuration file.\n" +
" Currently, the available genome are:\n" +
" ${params.genomes.keySet().join(", ")}\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
System.exit(1)
} else if (!params.genomes.containsKey(params.ref_data.genome)) {
} else if (!params.genomes.containsKey(params.genome)) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Genome '${params.ref_data.genome}' not found in any config files provided to the pipeline.\n" +
" Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
" Currently, the available genome are:\n" +
" ${params.genomes.keySet().join(", ")}\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
System.exit(1)
}

if (!Constants.GENOMES_SUPPORTED.contains(params.ref_data.genome)) {
if (!params.ref_data.force_genome) {
log.error "ERROR: currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but got ${params.ref_data.genome}" +
if (!Constants.GENOMES_SUPPORTED.contains(params.genome)) {
if (!params.ref_data_force_genome) {
log.error "ERROR: currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but got ${params.genome}" +
", please adjust the --genome argument accordingly or override with --force_genome."
System.exit(1)
} else {
log.warn "currently only the GRCh37_hmf and GRCh38_hmf genomes are supported but forcing to " +
"proceed with \"${params.ref_data.genome}\""
"proceed with \"${params.genome}\""
}
}

if (!params.ref_data.genome_version) {
if (!params.ref_data_genome_version) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Genome version wasn't provided and genome '${params.ref_data.genome}' is not defined in \n" +
" Genome version wasn't provided and genome '${params.genome}' is not defined in \n" +
" genome version list.\n" +
" Currently, the list of genomes in the version list include:\n" +
" ${Constants.GENOMES_DEFINED.join(", ")}\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
System.exit(1)
}

if (!params.ref_data.genome_type) {
if (!params.ref_data_genome_type) {
log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Genome type wasn't provided and genome '${params.ref_data.genome}' is not defined in \n" +
" Genome type wasn't provided and genome '${params.genome}' is not defined in \n" +
" genome type list.\n" +
" Currently, the list of genomes in the type list include:\n" +
" ${Constants.GENOMES_DEFINED.join(", ")}\n" +
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
System.exit(1)
}

if (!params.ref_data.hmf_data_path) {
if (!params.ref_data_hmf_data_path) {
log.error "ERROR: HMF data path wasn't provided"
System.exit(1)
}
Expand Down
24 changes: 9 additions & 15 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,15 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_oncoanaly
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// Reference config lifted up into params.ref_data to conform to standards/linting
params.ref_data = [
genome: params.genome,
force_genome: params.force_genome,
]

params.ref_data.genome_fasta = getGenomeAttribute('fasta')
params.ref_data.genome_fai = getGenomeAttribute('fai')
params.ref_data.genome_dict = getGenomeAttribute('dict')
params.ref_data.genome_bwa_index = getGenomeAttribute('bwa_index')
params.ref_data.genome_bwa_index_image = getGenomeAttribute('bwa_index_image')
params.ref_data.genome_bwa_index_bseq = getGenomeAttribute('bwa_index_bseq')
params.ref_data.genome_bwa_index_biidx = getGenomeAttribute('bwa_index_biidx')
params.ref_data.genome_gridss_index = getGenomeAttribute('gridss_index')
params.ref_data.genome_star_index = getGenomeAttribute('star_index')
params.ref_data_genome_fasta = getGenomeAttribute('fasta')
params.ref_data_genome_fai = getGenomeAttribute('fai')
params.ref_data_genome_dict = getGenomeAttribute('dict')
params.ref_data_genome_bwa_index = getGenomeAttribute('bwa_index')
params.ref_data_genome_bwa_index_image = getGenomeAttribute('bwa_index_image')
params.ref_data_genome_bwa_index_bseq = getGenomeAttribute('bwa_index_bseq')
params.ref_data_genome_bwa_index_biidx = getGenomeAttribute('bwa_index_biidx')
params.ref_data_genome_gridss_index = getGenomeAttribute('gridss_index')
params.ref_data_genome_star_index = getGenomeAttribute('star_index')

WorkflowMain.setParamsDefaults(params, log)
WorkflowMain.validateParams(params, log)
Expand Down
3 changes: 0 additions & 3 deletions modules/local/linx/somatic/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ process LINX_SOMATIC {
path ensembl_data_resources
path known_fusion_data
path driver_gene_panel
path gene_id_file

output:
tuple val(meta), path('linx_somatic/'), emit: annotation_dir
Expand All @@ -24,7 +23,6 @@ process LINX_SOMATIC {

script:
def args = task.ext.args ?: ''
def gene_id_file_arg = gene_id_file ? "-gene_id_file ${gene_id_file}" : ''

"""
linx \\
Expand All @@ -33,7 +31,6 @@ process LINX_SOMATIC {
-sample ${meta.sample_id} \\
-sv_vcf ${purple_dir}/${meta.sample_id}.purple.sv.vcf.gz \\
-purple_dir ${purple_dir} \\
${gene_id_file_arg} \\
-ref_genome_version ${genome_ver} \\
-ensembl_data_dir ${ensembl_data_resources} \\
-known_fusion_file ${known_fusion_data} \\
Expand Down
4 changes: 0 additions & 4 deletions modules/local/linx/somatic/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ input:
type: file
description: Driver Gene Panel file
pattern: "*.{csv}"
- gene_id_file:
type: file
description: Ensembl gene ID file (optional)
pattern: "*.{csv}"
output:
- meta:
type: map
Expand Down
59 changes: 37 additions & 22 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ params {

// Workflow mode
mode = null
panel = null

// Process configuration
processes_manual = false
Expand All @@ -23,28 +22,23 @@ params {

// Reference genome information; iGenomes is effectively disabled but retained for linting
genome = null
genome_version = null
genome_type = null
force_genome = false

igenomes_base = 's3://ngi-igenomes/igenomes'
igenomes_ignore = false
igenomes_ignore = true

// Other reference data and config exposed to user on CLI
hmf_data_path = null
panel_data_path = null
virusbreakenddb_path = null
hla_slice_bed = null
max_fastq_records = 10000000

max_fastq_records = 10000000
isofox_counts = null
isofox_gc_ratios = null
isofox_gene_ids = null
isofox_tpm_norm = null
isofox_read_length = null
isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS'

isofox_counts = null
isofox_gc_ratios = null
isofox_gene_ids = null
isofox_tpm_norm = null
isofox_read_length = null
gridss_config = null

gridss_config = null
create_stub_placeholders = false

// Boilerplate options
outdir = null
Expand All @@ -57,11 +51,6 @@ params {
help = false
version = false

// Other workflow inputs and options
create_stub_placeholders = false
linx_gene_id_file = null
isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS'

// Config options
config_profile_name = null
config_profile_description = null
Expand All @@ -76,10 +65,36 @@ params {
max_cpus = 16
max_time = '240.h'

// Parameter lint ignore list
// NOTE(SW): entries here are generally have conditional defaults or are for internal use only
lint_ignore = [
'lint_ignore',
'genome_type',
'genome_version',
'genomes',
'hmf_data_paths',
'panel',
'panel_data_paths',
'ref_data',
'ref_data_genome_bwa_index',
'ref_data_genome_bwa_index_biidx',
'ref_data_genome_bwa_index_bseq',
'ref_data_genome_bwa_index_image',
'ref_data_genome_dict',
'ref_data_genome_fai',
'ref_data_genome_fasta',
'ref_data_genome_gridss_index',
'ref_data_genome_star_index',
'ref_data_hla_slice_bed',
'ref_data_hmf_data_path',
'ref_data_panel_data_path',
'ref_data_virusbreakenddb_path',
]

// Schema validation default options
validationFailUnrecognisedParams = false
validationLenientMode = true
validationSchemaIgnoreParams = 'genomes,igenomes_base,ref_data,hmf_data_paths,panel_data_paths'
validationSchemaIgnoreParams = "igenomes_base,${lint_ignore.join(',')}"
validationShowHiddenParams = false
validate_params = true

Expand Down
Loading

0 comments on commit 5ab10af

Please sign in to comment.