Skip to content

Commit

Permalink
Merge pull request #25 from uclahs-cds/yashpatel-parameterize-base-re…
Browse files Browse the repository at this point in the history
…source-allocations

Parameterize base resource allocations
  • Loading branch information
yashpatel6 authored Aug 1, 2023
2 parents 4f9e28e + 9558fd6 commit 836d113
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
---

## [Unreleased]
### [Added]
- Custom resource allocation updates through configuration parameters

---

Expand Down
48 changes: 48 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,54 @@ For normal-only or tumour-only samples, exclude the fields for the other state.
| `docker_container_registry` | optional | string | Registry containing tool Docker images. Default: `ghcr.io/uclahs-cds` |
| `metapipeline_delete_input_bams` | optional | boolean | Set to true to delete the input BAM files once the initial processing step is complete. **WARNING**: This option should NOT be used for individual runs of call-gSNP; it's intended for metapipeline-DNA to optimize disk space usage by removing files that are no longer needed from the `workDir`. |
| `metapipeline_final_output_dir` | optional | string | Absolute path for the final output directory of metapipeline-DNA that's expected to contain the output BAM from align-DNA. **WARNING**: This option should not be used for individual runs of call-gSNP; it's intended for metapipeline-DNA to optimize disk space usage. |
| `base_resource_update` | optional | namespace | Namespace of parameters to update base resource allocations in the pipeline. Usage and structure are detailed in `template.config` and below. |

#### Base resource allocation updaters
To update the base resource (cpus or memory) allocations for processes, use the following structure and add the necessary parts. The default allocations can be found in the [node-specific config files](./config/)
```Nextflow
base_resource_update {
memory = [
[['process_name', 'process_name2'], <multiplier for resource>],
[['process_name3', 'process_name4'], <different multiplier for resource>]
]
cpus = [
[['process_name', 'process_name2'], <multiplier for resource>],
[['process_name3', 'process_name4'], <different multiplier for resource>]
]
}
```
> **Note** Resource updates will be applied in the order they're provided so if a process is included twice in the memory list, it will be updated twice in the order it's given.
Examples:

- To double memory of all processes:
```Nextflow
base_resource_update {
memory = [
[[], 2]
]
}
```
- To double memory for `run_ApplyBQSR_GATK` and triple memory for `run_validate_PipeVal` and `run_IndelRealigner_GATK`:
```Nextflow
base_resource_update {
memory = [
['run_ApplyBQSR_GATK', 2],
[['run_validate_PipeVal', 'run_IndelRealigner_GATK'], 3]
]
}
```
- To double CPUs and memory for `run_ApplyBQSR_GATK` and double memory for `run_validate_PipeVal`:
```Nextflow
base_resource_update {
cpus = [
['run_ApplyBQSR_GATK', 2]
]
memory = [
[['run_ApplyBQSR_GATK', 'run_validate_PipeVal'], 2]
]
}
```

---

Expand Down
70 changes: 69 additions & 1 deletion config/custom_schema_types.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ custom_schema_types {
'normal',
'tumor'
]
allowed_resource_types = [
'memory',
'cpus'
]

/**
* Check that input types are in allowed list
Expand All @@ -21,6 +25,13 @@ custom_schema_types {
}
}

/**
* Check if input is a String or GString
*/
is_string = { val ->
return (val in String || val in GString)
}

/**
* Check if given input is a Namespace
*/
Expand All @@ -39,6 +50,32 @@ custom_schema_types {
}
}

/**
* Check if given input is a number
*/
check_if_number = { val, String name ->
if (!(val in Integer || val in Float)) {
throw new Exception("${name} should be an Integer or Float, not ${val.getClass()}")
}
}

/**
* Check if given input is valid process list
*/
check_if_process_list = { val, String name ->
if (!custom_schema_types.is_string(val)) {
try {
custom_schema_types.check_if_list(val, name)
} catch(Exception e) {
throw new Exception("${name} should be either a string or a list. Please provide valid input.")
}
}

if (val.isEmpty()) {
throw new Exception("Empty string specified for ${name}. Please provide valid input.")
}
}

/**
* Check that input is namespace of expected types
*/
Expand Down Expand Up @@ -73,6 +110,24 @@ custom_schema_types {
}
}

/**
* Check namespace for resource updates
*/
check_resource_update_namespace = { Map options, String name, Map properties ->
custom_schema_types.check_if_namespace(options[name], name)
def given_keys = options[name].keySet() as ArrayList
if (given_keys.size() <= 0) {
return
}
custom_schema_types.check_input_type_keys(given_keys, name, custom_schema_types.allowed_resource_types)

options[name].each { entry ->
def entry_as_map = [:]
entry_as_map[entry.key] = entry.value
schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
}
}

/**
* Check if proper BAM entry list
*/
Expand All @@ -83,6 +138,17 @@ custom_schema_types {
}
}

/**
* Check list of resource updates
*/
check_resource_update_list = { Map options, String name, Map properties ->
custom_schema_types.check_if_list(options[name], name)
for (item in options[name]) {
custom_schema_types.check_if_process_list(item[0], name)
custom_schema_types.check_if_number(item[1], name)
}
}

/**
* Check aligner and version
*/
Expand All @@ -97,6 +163,8 @@ custom_schema_types {
'InputNamespace': custom_schema_types.check_input_namespace,
'InputBAMNamespace': custom_schema_types.check_bam_namespace,
'BAMEntryList': custom_schema_types.check_bam_list,
'AlignerTool': custom_schema_types.check_aligner
'AlignerTool': custom_schema_types.check_aligner,
'ResourceUpdateNamespace': custom_schema_types.check_resource_update_namespace,
'ResourceUpdateList': custom_schema_types.check_resource_update_list
]
}
14 changes: 14 additions & 0 deletions config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -176,12 +176,26 @@ methods {
}
}

modify_base_allocations = {
if (!(params.containsKey('base_resource_update') && params.base_resource_update)) {
return
}

params.base_resource_update.each { resource, updates ->
updates.each { processes, multiplier ->
def processes_to_update = (custom_schema_types.is_string(processes)) ? [processes] : processes
methods.update_base_resource_allocation(resource, multiplier, processes_to_update)
}
}
}

setup = {
methods.set_env()
schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
schema.validate()
methods.set_ids_from_bams()
methods.set_allocation()
methods.modify_base_allocations()
methods.set_log_output_dir()
methods.set_output_dir()
methods.set_pipeline_logs()
Expand Down
13 changes: 13 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,19 @@ metapipeline_final_output_dir:
type: 'String'
required: false
help: 'Directory containing final outputs to check before input deletion'
base_resource_update:
type: 'ResourceUpdateNamespace'
required: false
help: 'User-defined modifications for adjusting base resource allocations for processes'
elements:
memory:
type: 'ResourceUpdateList'
required: false
help: 'List of memory updates'
cpus:
type: 'ResourceUpdateList'
required: false
help: 'List of CPU updates'
input:
type: 'InputNamespace'
required: true
Expand Down
3 changes: 3 additions & 0 deletions config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ params {
bundle_known_indels_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz"
bundle_v0_dbsnp138_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz"
bundle_contest_hapmap_3p3_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz"

// Base resource allocation updater
// See README for adding parameters to update the base resource allocations
}

// Setup the pipeline config. DO NOT REMOVE THIS LINE!
Expand Down
2 changes: 1 addition & 1 deletion external/pipeline-Nextflow-config

0 comments on commit 836d113

Please sign in to comment.