diff --git a/.gitignore b/.gitignore index 2b3f79d..f5a7b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -81,7 +81,3 @@ work/ *.tar *.zip -# Other -test/* -test/*/* -slurm-*.out diff --git a/CHANGELOG.md b/CHANGELOG.md index a45bf7e..0ace8af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add `mosdepth` quantize workflow - Add `mosdepth` coverage windows workflow - Add `FastQC` workflow - Add per readgroup and per library functionality diff --git a/README.md b/README.md index af2a9df..033537f 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ 9. [References](#references) ## Overview -This pipeline takes BAMs and runs selected Quality Control (QC) steps. Available algorithms are currently `SAMtools stats`, `Picard CollectWgsMetrics` and `Qualimap bamqc`. Generally either `Qualimap bamqc` or `SAMtools stats and Picard CollectWgsMetrics` should be run, not both. `Qualimap bamqc` uses a lot of memory and should not be run within `uclahs-cds/metapipeline-DNA`. Input can include any combination of tumor and normal BAMs from a single donor. Each will be processed independently. RNA specific QC is not yet implemented but is expected soon. +This pipeline takes BAMs and runs selected Quality Control (QC) steps. Available algorithms are currently `SAMtools stats`, `Picard CollectWgsMetrics`, `FastQC`, `Qualimap bamqc`, `mosdepth coverage` and `mosdepth quantize`. Generally either `Qualimap bamqc` or `SAMtools stats and Picard CollectWgsMetrics` should be run, not both. `Qualimap bamqc` uses a lot of memory and should not be run within `uclahs-cds/metapipeline-DNA`. Input can include any combination of tumor and normal BAMs from a single donor. Each will be processed independently. RNA specific QC is not yet implemented but is expected soon. --- @@ -51,9 +51,12 @@ Each of the below algorithms, if selected, will run in parallel subject to avail ### 4. FastQC [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/) aims to provide a QC report which can spot problems which originate either in the sequencer or in the starting library material. -### 5. mosdepth windows +### 5. mosdepth coverage [mosdepth](https://github.com/brentp/mosdepth) by windows provides fast BAM/CRAM depth calculation. +### 6. mosdepth quantize +[mosdepth](https://github.com/brentp/mosdepth) quantize creates a bed file labeling regions within specified coverage thresholds. Similar to GATK's callable loci tool. + --- ## Inputs @@ -78,7 +81,7 @@ input: | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | -| `algorithm` | list | no | List of tools to be run: ['fastqc', 'samtools_stats', 'collectwgsmetrics', 'mosdepth_coverage', 'qualimap_bamqc'], default = ['stats', 'collectwgsmetrics'] | +| `algorithm` | list | no | List of tools to be run: ['fastqc', 'samtools_stats', 'collectwgsmetrics', 'mosdepth_coverage', 'mosdepth_quantize', 'qualimap_bamqc'], default = ['stats', 'collectwgsmetrics'] | | `reference` | path | yes/no | Reference fasta is required only for `CollectWgsMetrics` | | `output_dir` | path | yes | Not required if `blcds_registered_dataset` = `true` | | `blcds_registered_dataset` | boolean | no | Default is `false`. Only `uclahs_cds` users should change this. When `true`, BLCDS folder structure is used | @@ -92,12 +95,6 @@ input: | stats_remove_duplicates | boolean | no | Ignore reads marked as duplicate. Default = `false` | | stats_additional_options | string | no | Any additional options recognized by `samtools stats` | -#### FastQC specific configuration -| Field | Type | Required | Description | -| ----- | ---- | ------------ | ------------------------ | -| fastqc_level | string | yes | 'readgroup', 'library' or 'sample' | -| fastqc_additional_options | string | no | Any additional options recognized by `FastQC` | - #### Picard specific configuration | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | @@ -107,12 +104,11 @@ input: | cwm_use_fast_algorithm | boolean | no | If `true`, fast algorithm is used | | cwm_additional_options | string | no | Any additional options recognized by `CollectWgsMetrics` | -#### mosdepth windows specific configuration +#### FastQC specific configuration | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | -| mosdepth_use_fast_algorithm | boolean | no | `fast` algorithm ignores read pair overlaps and CIGARs. It should not be used on libraries with small insert sizes. Default = `false` | -| mosdepth_window_size | integer | no | Size for `mosdepth windows` coverage calculations | -| mosdepth_additional_options | string | no | Any additional options recognized by `mosdepth` | +| fastqc_level | string | yes | 'readgroup', 'library' or 'sample' | +| fastqc_additional_options | string | no | Any additional options recognized by `FastQC` | #### Qualimap specific configuration | Field | Type | Required | Description | @@ -120,6 +116,25 @@ input: | bamqc_output_format | string | no | Choice of 'pdf' or 'html', default = 'pdf' | | bamqc_additional_options | string | no | Any additional options recognized by `bamqc` | +#### mosdepth coverage specific configuration +| Field | Type | Required | Description | +| ----- | ---- | ------------ | ------------------------ | +| mosdepth_use_fast_algorithm | boolean | no | `fast` algorithm ignores read pair overlaps and CIGARs. It should not be used on libraries with small insert sizes. Default = `false` | +| mosdepth_per_base_output | boolean | no | Output coverage for every base. Default = `true` | +| mosdepth_window_size | integer | no | Size for `mosdepth windows` coverage calculations | +| mosdepth_additional_options | string | no | Any additional options recognized by `mosdepth`, `--mapq 20 recommended` | + +#### mosdepth quantize specific configuration +| Field | Type | Required | Description | +| ----- | ---- | ------------ | ------------------------ | +| mosdepth_quantize_cutoffs | string | no | cutoffs for coverage regions. Default = `0:1:5:150` | +| mosdepth_quantize_use_fast_algorithm | boolean | no | `fast` algorithm ignores read pair overlaps and CIGARs. It should not be used on libraries with small insert sizes. Default = `false` | +| mosdepth_q0_label | string | no | lowest coverage regions label. Default = `Q0` +| mosdepth_q1_label | string | no | next coverage regions label. Default = `Q1` +| mosdepth_q2_label | string | no | next coverage regions label. Default = `Q2` +| mosdepth_q3_label | string | no | highest coverage regions label. Default = `Q3` +| mosdepth_quantize_additional_options | string | no | Any additional options recognized by `mosdepth`. `--mapq 20 recommended` | + #### Base resource allocation updaters To update the base resource (cpus or memory) allocations for processes, use the following structure. The default allocations can be found in the [node-specific config files](./config/) ```Nextflow @@ -172,14 +187,21 @@ base_resource_update { | Output | Description | | ------------ | ------------------------ | -| `{SAMtools-version}_{dataset_id}_{sample_id}_stats.txt` | SAMtools stats results | -| `{Picard-version}_{dataset_id}_{sample_id}_wgs-metrics.txt` | Picard CollectWgsMetrics results | -| `{Qualimap-version}_{dataset_id}_{sample_id}_stats` | Directory of Qualimap results, including, `genome_results.txt` and either `.pdf` or `.html and supporting directories`| -| `{FastQC-version}_{dataset_id}_{sample_id}_fastqc` | Directory of FastQC results | -| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.mosdepth.summary.txt` | Coverage by region with a final line for `total` | -| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.mosdepth.global.dist.txt` | a cumulative distribution indicating the proportion of total bases that were covered for at least a given coverage value | -| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.mosdepth.region.dist.txt` | a cumulative distribution indicating the proportion of the windows that were covered for at least a given coverage value | -| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.regions.bed.gz` | Bedfile giving coverage for each window | +| `{SAMtools-version}_{dataset_id}_{sample_id}_stats.txt` | `SAMtools stats` sample level results | +| `{SAMtools-version}_{dataset_id}_{sample_id}-{library_id}_stats.txt` | `SAMtools stats` library level results | +| `{SAMtools-version}_{dataset_id}_{sample_id}-{library_id}-{rg_id}_stats.txt` | `SAMtools stats` readgroup level results | +| `{Picard-version}_{dataset_id}_{sample_id}_wgs-metrics.txt` | `Picard CollectWgsMetrics` results | +| `{Qualimap-version}_{dataset_id}_{sample_id}_stats` | Directory of `Qualimap` results, including, `genome_results.txt` and either `.pdf` or `.html and supporting directories`| +| `{FastQC-version}_{dataset_id}_{sample_id}_fastqc` | Directory of sample level `FastQC` results | +| `{FastQC-version}_{dataset_id}_{sample_id}-{library_id}_fastqc` | Directory of library level `FastQC` results | +| `{FastQC-version}_{dataset_id}_{sample_id}-{library_id}-{rg_id}_fastqc` | Directory of readgroup level `FastQC` results | +| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.mosdepth.summary.txt` | `mosdepth` coverage results by region with a final line for `total` | +| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.mosdepth.global.dist.txt` | `mosdepth` coverage cumulative distribution indicating the proportion of total bases that were covered for at least a given coverage value | +| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.mosdepth.region.dist.txt` | `mosdepth` coverage cumulative distribution indicating the proportion of the windows that were covered for at least a given coverage value | +| `{mosdepth-version}_{dataset_id}_{sample_id}-{window_size}.regions.bed.gz` | `mosdepth` coverage bedfile giving coverage for each window | +| `{mosdepth-version}_{dataset_id}_{sample_id}-quantize-{q0}-{q1}-{q2}-{q3}.mosdepth.summary.txt` | `mosdepth` quantize coverage results by region with a final line for `total` | +| `{mosdepth-version}_{dataset_id}_{sample_id}-quantize-{q0}-{q1}-{q2}-{q3}.mosdepth.global.dist.txt` | `mosdepth` quantize cumulative distribution indicating the proportion of total bases that were covered for at least a given coverage value | +| `{mosdepth-version}_{dataset_id}_{sample_id}-quantize-{q0}-{q1}-{q2}-{q3}.quantized.bed.gz` | `mosdepth` quantize bed file --- diff --git a/config/F16.config b/config/F16.config index eddb896..fce7baf 100644 --- a/config/F16.config +++ b/config/F16.config @@ -43,6 +43,16 @@ process { } } } + withName: quantize_coverage_mosdepth { + cpus = 1 + memory = 8.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB diff --git a/config/F2.config b/config/F2.config index e03a716..e7475b0 100644 --- a/config/F2.config +++ b/config/F2.config @@ -43,6 +43,26 @@ process { } } } + withName: assess_coverage_mosdepth { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: quantize_coverage_mosdepth { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.5.GB diff --git a/config/F32.config b/config/F32.config index b28c4c0..aedae90 100644 --- a/config/F32.config +++ b/config/F32.config @@ -43,6 +43,16 @@ process { } } } + withName: quantize_coverage_mosdepth { + cpus = 1 + memory = 8.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB diff --git a/config/F4.config b/config/F4.config index d77aeae..0ac9809 100644 --- a/config/F4.config +++ b/config/F4.config @@ -43,6 +43,16 @@ process { } } } + withName: quantize_coverage_mosdepth { + cpus = 1 + memory = 8.GB + retry_strategy { + memory { + strategy = 'add' + operand = 2.GB + } + } + } withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB diff --git a/config/F72.config b/config/F72.config index 043bb9a..1d7e40d 100644 --- a/config/F72.config +++ b/config/F72.config @@ -43,6 +43,16 @@ process { } } } + withName: quantize_coverage_mosdepth { + cpus = 1 + memory = 8.GB + retry_strategy { + memory { + strategy = 'add' + operand = 8.GB + } + } + } withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB diff --git a/config/F8.config b/config/F8.config index be21380..da1ec09 100644 --- a/config/F8.config +++ b/config/F8.config @@ -43,6 +43,16 @@ process { } } } + withName: quantize_coverage_mosdepth { + cpus = 1 + memory = 8.GB + retry_strategy { + memory { + strategy = 'add' + operand = 2.GB + } + } + } withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB diff --git a/config/M64.config b/config/M64.config index 9e3e3a8..5a3ef51 100644 --- a/config/M64.config +++ b/config/M64.config @@ -43,6 +43,16 @@ process { } } } + withName: quantize_coverage_mosdepth { + cpus = 1 + memory = 8.GB + retry_strategy { + memory { + strategy = 'add' + operand = 8.GB + } + } + } withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB diff --git a/config/schema.yaml b/config/schema.yaml index d91f084..655b69c 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -9,11 +9,12 @@ dataset_id: help: 'Dataset identifier' algorithm: type: 'List' - required: false + required: true help: 'List of QC algorithms' choices: - fastqc - mosdepth_coverage + - mosdepth_quantize - samtools_stats - collectwgsmetrics - qualimap_bamqc @@ -82,22 +83,58 @@ mosdepth_use_fast_algorithm: required: false default: true help: 'fast algorithm ignores read pair overlaps and CIGAR information' -mosdepth_window_size: - type: 'Integer' - required: false - default: 500 - help: 'Window size for mosdepth coverage calculation' mosdepth_per_base_output: type: 'Bool' required: false default: true help: 'Output per-base coverage' +mosdepth_window_size: + type: 'Integer' + required: false + default: 500 + help: 'Window size for mosdepth coverage calculation' mosdepth_additional_options: type: 'String' required: false allow_empty: true default: '' help: 'Additional arguments for mosdepth command' +mosdepth_quantize_cutoffs: + type: 'String' + required: false + default: '0:1:5:150' + help: 'Quantize coverage values into these bins' +mosdepth_quantize_use_fast_algorithm: + type: 'Bool' + required: false + default: false + help: 'Use fast algorithm for quantizing coverage values' +mosdepth_q0_label: + type: 'String' + required: false + default: 'Q0' + help: 'Label for lowest coverage bin' +mosdepth_q1_label: + type: 'String' + required: false + default: 'Q1' + help: 'Label for second lowest coverage bin' +mosdepth_q2_label: + type: 'String' + required: false + default: 'Q2' + help: 'Label for third lowest coverage bin' +mosdepth_q3_label: + type: 'String' + required: false + default: 'Q3' + help: 'Label for highest coverage bin' +mosdepth_quantize_additional_options: + type: 'String' + required: false + allow_empty: true + default: '' + help: 'Additional arguments for mosdepth-quantize command' cwm_coverage_cap: type: 'Integer' required: false diff --git a/config/template.config b/config/template.config index 28dd927..593b1f9 100644 --- a/config/template.config +++ b/config/template.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithm = ['samtools_stats', 'collectwgsmetrics'] // 'fastqc', 'samtools_stats', 'collectwgsmetrics', 'mosdepth_coverage', 'qualimap_bamqc' + algorithm = ['samtools_stats', 'collectwgsmetrics'] // 'fastqc', 'samtools_stats', 'collectwgsmetrics', 'mosdepth_quantize', 'mosdepth_coverage', 'qualimap_bamqc' reference = '/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' output_dir = '/path/to/output/directory' blcds_registered_dataset = false // if you want the output to be registered @@ -22,9 +22,18 @@ params { // mosdepth window-base coverage options // fast algorithm ignores read pair overlaps and should not be used on libraries with small insert sizes mosdepth_use_fast_algorithm = false - mosdepth_window_size = 500 mosdepth_per_base_output = true - mosdepth_additional_options = '' + mosdepth_window_size = 500 + mosdepth_additional_options = '--mapq 20' + + // mosdepth quantized coverage (like GATK's Callable Regions) + mosdepth_quantize_cutoffs = '0:1:5:150' + mosdepth_quantize_use_fast_algorithm = false + mosdepth_q0_label = 'NO_COVERAGE' + mosdepth_q1_label = 'LOW_COVERAGE' + mosdepth_q2_label = 'CALLABLE' + mosdepth_q3_label = 'HIGH_COVERAGE' + mosdepth_quantize_additional_options = '--mapq 20' // Picard CollectWgsMetrics options cwm_coverage_cap = 1000 diff --git a/main.nf b/main.nf index b07419f..f4a9b2e 100755 --- a/main.nf +++ b/main.nf @@ -53,12 +53,16 @@ include { run_bamqc_Qualimap } from './module/bamqc_qualimap' addParams( workflow_log_output_dir: "${params.log_output_dir}/process-log/Qualimap-${params.qualimap_version}" ) - include { assess_coverage_mosdepth } from './module/windows_mosdepth' addParams( workflow_output_dir: "${params.output_dir_base}/mosdepth-${params.mosdepth_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/mosdepth-${params.mosdepth_version}" ) +include { quantize_coverage_mosdepth } from './module/quantize_mosdepth' addParams( + workflow_output_dir: "${params.output_dir_base}/mosdepth-${params.mosdepth_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/mosdepth-${params.mosdepth_version}" + ) + include { indexFile } from './external/pipeline-Nextflow-module/modules/common/indexFile/main.nf' log.info """\ @@ -116,10 +120,19 @@ log.info """\ - mosdepth options: mosdepth_version: ${params.mosdepth_version} + - mosdepth coverage options: + mosdepth_use_fast_algorithm: ${params.mosdepth_use_fast_algorithm} mosdepth_window_size: ${params.mosdepth_window_size} mosdepth_per_base_output: ${params.mosdepth_per_base_output} - mosdepth_use_fast_algorithm: ${params.mosdepth_use_fast_algorithm} mosdepth_additional_options: ${params.mosdepth_additional_options} + - mosdepth quantize options: + mosdepth_quantize_cutoffs: ${params.mosdepth_quantize_cutoffs} + mosdepth_quantize_use_fast_algorithm: ${params.mosdepth_quantize_use_fast_algorithm} + mosdepth_q0_label: ${params.mosdepth_q0_label} + mosdepth_q1_label: ${params.mosdepth_q1_label} + mosdepth_q2_label: ${params.mosdepth_q2_label} + mosdepth_q3_label: ${params.mosdepth_q3_label} + mosdepth_quantize_additional_options: ${params.mosdepth_quantize_additional_options} - picard CollectWgsMetrics options: picard_version: ${params.picard_version} @@ -250,6 +263,11 @@ workflow { samples_to_process_ch ) } + if ('mosdepth_quantize' in params.algorithm) { + quantize_coverage_mosdepth( + samples_to_process_ch + ) + } if ('collectwgsmetrics' in params.algorithm) { run_CollectWgsMetrics_Picard( samples_to_process_ch, diff --git a/module/quantize_mosdepth.nf b/module/quantize_mosdepth.nf new file mode 100644 index 0000000..0e9132bd --- /dev/null +++ b/module/quantize_mosdepth.nf @@ -0,0 +1,48 @@ +/* +* mosdepth WGS quantized coverage assessment +* +*/ + +include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' + +process quantize_coverage_mosdepth { + container params.docker_image_mosdepth + + publishDir path: "${params.workflow_output_dir}/output", + pattern: "${output_filename}*", + mode: "copy", + enabled: true + + ext log_dir_suffix: { "-${sm_id}" } + + input: + tuple path(bam), path(bam_index), val(orig_id), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(sm_type), val(read_length) + + output: + path "${output_filename}*" + path ".command.*" + + script: + output_filename = generate_standard_filename("mosdepth-${params.picard_version}", + params.dataset_id, + sm_id, + [:]) + def fast_algorithm_arg = params.mosdepth_use_fast_algorithm ? "--fast-mode" : "" + def quantize_output_suffix = params.mosdepth_quantize_cutoffs.replace(':', '-') + + """ + set -euo pipefail + export MOSDEPTH_Q0="${params.mosdepth_q0_label}" + export MOSDEPTH_Q1="${params.mosdepth_q1_label}" + export MOSDEPTH_Q2="${params.mosdepth_q2_label}" + export MOSDEPTH_Q3="${params.mosdepth_q3_label}" + mosdepth \ + --no-per-base \ + ${fast_algorithm_arg} \ + --threads ${task.cpus} \ + --quantize ${params.mosdepth_quantize_cutoffs} \ + ${params.mosdepth_quantize_additional_options} \ + ${output_filename}-quantize-${quantize_output_suffix} \ + ${bam} + """ +} diff --git a/module/windows_mosdepth.nf b/module/windows_mosdepth.nf index 34f92cd..bda366e 100644 --- a/module/windows_mosdepth.nf +++ b/module/windows_mosdepth.nf @@ -23,14 +23,13 @@ process assess_coverage_mosdepth { path ".command.*" script: - output_filename = generate_standard_filename("mosdepth${params.picard_version}", + output_filename = generate_standard_filename("mosdepth-${params.picard_version}", params.dataset_id, sm_id, [:]) fast_algorithm_arg = params.mosdepth_use_fast_algorithm ? "--fast-mode" : "" per_base_output_arg = params.mosdepth_per_base_output ? "" : "--no-per-base" - """ set -euo pipefail mosdepth \ diff --git a/nftest.yml b/nftest.yml index a840f02..a4f7848 100644 --- a/nftest.yml +++ b/nftest.yml @@ -53,37 +53,26 @@ cases: - actual: generate-SQC-BAM-*/NA24149/Qualimap-*/output/Qualimap-*_GIAB_HG003_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_GIAB_HG003_stats/genome_results.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth*_GIAB_HG003-window500.mosdepth.global.dist.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth3.1.0_GIAB_HG003-window500.mosdepth.global.dist.txt + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-window500.mosdepth.global.dist.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-window500.mosdepth.global.dist.txt method: md5 - - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth*_GIAB_HG003-window500.mosdepth.region.dist.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth3.1.0_GIAB_HG003-window500.mosdepth.region.dist.txt + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-window500.mosdepth.region.dist.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-window500.mosdepth.region.dist.txt method: md5 - - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth*_GIAB_HG003-window500.mosdepth.summary.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth3.1.0_GIAB_HG003-window500.mosdepth.summary.txt + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-window500.mosdepth.summary.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-window500.mosdepth.summary.txt method: md5 - - name: hg003-windows - message: test mosdepth coverage - nf_script: main.nf - nf_config: test/config/windows.config - params_file: test/yaml/HG003_0.05x-selected-readgroups.yaml - skip: true - verbose: true - asserts: - - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth*_GIAB_HG003-window500.mosdepth.global.dist.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth3.1.0_GIAB_HG003-window500.mosdepth.global.dist.txt + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-quantize-0-1-5-150.mosdepth.global.dist.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-quantize-0-1-5-150.mosdepth.global.dist.txt method: md5 - - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth*_GIAB_HG003-window500.mosdepth.region.dist.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth3.1.0_GIAB_HG003-window500.mosdepth.region.dist.txt + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-quantize-0-1-5-150.mosdepth.summary.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-quantize-0-1-5-150.mosdepth.summary.txt method: md5 - - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth*_GIAB_HG003-window500.mosdepth.summary.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth3.1.0_GIAB_HG003-window500.mosdepth.summary.txt - method: md5 - - name: a_mini-all-tools - message: test all tools with a_mini, single readgroup normal and tumor + - name: a_mini-multiple-samples-all-tools + message: test all tools with 1 normal and 2 tumor samples (single readgroups) nf_script: main.nf nf_config: test/config/all-tools.config - params_file: test/yaml/a_mini.yaml + params_file: test/yaml/a_mini-multiple-samples.yaml skip: false verbose: true asserts: @@ -93,30 +82,42 @@ cases: - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5.n1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1_fastqc/fastqc_data.txt + script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_HG002.N_stats.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5_stats.txt script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5.n1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5.n1_stats.txt + script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_HG002.N_wgs-metrics.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_HG002.N_wgs-metrics.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_S2-v1.1.5_wgs-metrics.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_S2-v1.1.5_wgs-metrics.txt script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_S2-v1.1.5.n1_wgs-metrics.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_S2-v1.1.5.n1_wgs-metrics.txt + script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_HG002.N_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_HG002.N_stats/genome_results.txt method: md5 - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5_stats/genome_results.txt method: md5 - - name: a_mini-multiple-samples-all-tools - message: test all tools with 1 normal and 2 tumor samples (single readgroups) + - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt + method: md5 + - name: a_mini-all-tools + message: test all tools with a_mini, single readgroup normal and tumor nf_script: main.nf nf_config: test/config/all-tools.config - params_file: test/yaml/a_mini-multiple-samples.yaml - skip: false + params_file: test/yaml/a_mini.yaml + skip: true verbose: true asserts: - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_HG002.N_fastqc/fastqc_data.txt @@ -125,35 +126,54 @@ cases: - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5.n1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1_fastqc/fastqc_data.txt - script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_HG002.N_stats.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5.n1_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5.n1_stats.txt - script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_HG002.N_wgs-metrics.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_HG002.N_wgs-metrics.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_S2-v1.1.5_wgs-metrics.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_S2-v1.1.5_wgs-metrics.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_S2-v1.1.5.n1_wgs-metrics.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_S2-v1.1.5.n1_wgs-metrics.txt - script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_HG002.N_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_HG002.N_stats/genome_results.txt method: md5 - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5_stats/genome_results.txt method: md5 - - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt + - name: hg003-mosdepth-coverage + message: test mosdepth coverage + nf_script: main.nf + nf_config: test/config/mosdepth-coverage.config + params_file: test/yaml/HG003_0.05x-selected-readgroups.yaml + skip: true + verbose: true + asserts: + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-window500.mosdepth.global.dist.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-window500.mosdepth.global.dist.txt + method: md5 + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-window500.mosdepth.region.dist.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-window500.mosdepth.region.dist.txt + method: md5 + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-window500.mosdepth.summary.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-window500.mosdepth.summary.txt + method: md5 + - name: hg003-mosdepth-quantize + message: test mosdepth quantize + nf_script: main.nf + nf_config: test/config/mosdepth-quantize.config + params_file: test/yaml/HG003_0.05x-selected-readgroups.yaml + skip: true + verbose: true + asserts: + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-quantize-0-1-5-150.mosdepth.global.dist.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-quantize-0-1-5-150.mosdepth.global.dist.txt + method: md5 + - actual: generate-SQC-BAM-*/NA24149/mosdepth-*/output/mosdepth-*_GIAB_HG003-quantize-0-1-5-150.mosdepth.summary.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/mosdepth-3.1.0_GIAB_HG003-quantize-0-1-5-150.mosdepth.summary.txt method: md5 - name: hg003-fastqc message: test fastqc with downsampled HG003 subsetted to 6 readgroups and 3 libraries, readgroup level diff --git a/test/config/all-tools.config b/test/config/all-tools.config index e7d8859..236b6d7 100644 --- a/test/config/all-tools.config +++ b/test/config/all-tools.config @@ -8,8 +8,8 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithm = ['fastqc', 'samtools_stats', 'mosdepth_coverage', 'collectwgsmetrics', 'qualimap_bamqc'] // 'fastqc', 'samtools_stats', 'mosdepth_coverage', 'collectwgsmetrics', 'qualimap_bamqc' - reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' + algorithm = ['fastqc', 'samtools_stats', 'mosdepth_coverage', 'collectwgsmetrics', 'mosdepth_quantize', 'qualimap_bamqc'] + reference = '/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' blcds_registered_dataset = false // if you want the output to be registered save_intermediate_files = true @@ -20,9 +20,19 @@ params { // mosdepth window-base coverage options mosdepth_use_fast_algorithm = false + mosdepth_per_base_output = true mosdepth_window_size = 500 mosdepth_additional_options = '' + // mosdepth quantized coverage (like GATK's Callable Regions) + mosdepth_quantize_cutoffs = '0:1:5:150' + mosdepth_quantize_use_fast_algorithm = false + mosdepth_q0_label = 'NO_COVERAGE' + mosdepth_q1_label = 'LOW_COVERAGE' + mosdepth_q2_label = 'CALLABLE' + mosdepth_q3_label = 'HIGH_COVERAGE' + mosdepth_quantize_additional_options = '' + // Picard CollectWgsMetrics options cwm_coverage_cap = 1000 cwm_minimum_mapping_quality = 20 diff --git a/test/config/fastqc-readgroup.config b/test/config/fastqc-readgroup.config new file mode 100644 index 0000000..2459f02 --- /dev/null +++ b/test/config/fastqc-readgroup.config @@ -0,0 +1,25 @@ +// EXECUTION SETTINGS AND GLOBAL DEFAULTS + +// External config files import. DO NOT MODIFY THESE LINES! +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + + +// Inputs/parameters of the pipeline +params { + algorithm = ['fastqc'] + reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' + blcds_registered_dataset = false // if you want the output to be registered + save_intermediate_files = true + + // FastQC options + fastqc_level = 'readgroup' // 'readgroup', 'library' or 'sample' + fastqc_additional_options = '' + + // Base resource allocation updater + // See README for adding parameters to update the base resource allocations +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() diff --git a/test/config/fastqc-sample.config b/test/config/fastqc-sample.config new file mode 100644 index 0000000..d69799b --- /dev/null +++ b/test/config/fastqc-sample.config @@ -0,0 +1,25 @@ +// EXECUTION SETTINGS AND GLOBAL DEFAULTS + +// External config files import. DO NOT MODIFY THESE LINES! +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + + +// Inputs/parameters of the pipeline +params { + algorithm = ['fastqc'] + reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' + blcds_registered_dataset = false // if you want the output to be registered + save_intermediate_files = true + + // FastQC options + fastqc_level = 'sample' // 'readgroup', 'library' or 'sample' + fastqc_additional_options = '' + + // Base resource allocation updater + // See README for adding parameters to update the base resource allocations +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() diff --git a/test/config/mosdepth-coverage.config b/test/config/mosdepth-coverage.config new file mode 100644 index 0000000..a2909f6 --- /dev/null +++ b/test/config/mosdepth-coverage.config @@ -0,0 +1,45 @@ +// EXECUTION SETTINGS AND GLOBAL DEFAULTS + +// External config files import. DO NOT MODIFY THESE LINES! +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + + +// Inputs/parameters of the pipeline +params { + algorithm = ['mosdepth_coverage'] // 'fastqc', 'samtools_stats', 'mosdepth_coverage', 'collectwgsmetrics', 'qualimap_bamqc' + reference = '/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' + blcds_registered_dataset = false // if you want the output to be registered + save_intermediate_files = true + + // SAMtools stats options + stats_remove_duplicates = false + stats_additional_options = '' + + // mosdepth window-base coverage options + mosdepth_use_fast_algorithm = false + mosdepth_window_size = 500 + mosdepth_per_base_output = true + mosdepth_additional_options = '' + + // Picard CollectWgsMetrics options + cwm_coverage_cap = 1000 + cwm_minimum_mapping_quality = 20 + cwm_minimum_base_quality = 20 + cwm_additional_options = '' + cwm_use_fast_algorithm = false + + // Qualimap bamqc options + bamqc_output_format = 'pdf' + bamqc_additional_options = '' + + // FastQC options + fastqc_additional_options = '' + + // Base resource allocation updater + // See README for adding parameters to update the base resource allocations +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() diff --git a/test/config/mosdepth-quantize.config b/test/config/mosdepth-quantize.config new file mode 100644 index 0000000..e75b6c5 --- /dev/null +++ b/test/config/mosdepth-quantize.config @@ -0,0 +1,54 @@ +// EXECUTION SETTINGS AND GLOBAL DEFAULTS + +// External config files import. DO NOT MODIFY THESE LINES! +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + + +// Inputs/parameters of the pipeline +params { + algorithm = ['mosdepth_quantize'] // 'fastqc', 'samtools_stats', 'mosdepth_coverage', 'mosdepth_quantize', 'collectwgsmetrics', 'qualimap_bamqc' + reference = '/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' + blcds_registered_dataset = false // if you want the output to be registered + save_intermediate_files = true + + // SAMtools stats options + stats_remove_duplicates = false + stats_additional_options = '' + + // mosdepth window-base coverage options + mosdepth_use_fast_algorithm = false + mosdepth_window_size = 500 + mosdepth_per_base_output = true + mosdepth_additional_options = '' + + //mosdepth quantized coverage (like GATK's Callable Regions) + mosdepth_quantize_cutoffs = '0:1:5:150' + mosdepth_quantize_use_fast_algorithm = false + mosdepth_q0_label = 'NO_COVERAGE' + mosdepth_q1_label = 'LOW_COVERAGE' + mosdepth_q2_label = 'CALLABLE' + mosdepth_q3_label = 'HIGH_COVERAGE' + mosdepth_quantize_additional_options = '' + + // Picard CollectWgsMetrics options + cwm_coverage_cap = 1000 + cwm_minimum_mapping_quality = 20 + cwm_minimum_base_quality = 20 + cwm_additional_options = '' + cwm_use_fast_algorithm = false + + // Qualimap bamqc options + bamqc_output_format = 'pdf' + bamqc_additional_options = '' + + // FastQC options + fastqc_additional_options = '' + + // Base resource allocation updater + // See README for adding parameters to update the base resource allocations +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() diff --git a/test/yaml/HG003_0.05x-selected-readgroups.yaml b/test/yaml/HG003_0.05x-selected-readgroups.yaml new file mode 100644 index 0000000..b1cd839 --- /dev/null +++ b/test/yaml/HG003_0.05x-selected-readgroups.yaml @@ -0,0 +1,8 @@ +--- +patient_id: 'NA24149' +dataset_id: 'GIAB' +input: + BAM: + normal: + - path: "/hot/user/sfitzgibbon/giab-downloading/HG003/BWA-MEM2-2.2.1_GATK-4.2.4.1_0000082_HG003_subsampled_0.05X-selected-readgroups.bam" + read_length: 148