Skip to content

Commit

Permalink
Merge pull request #87 from uclahs-cds/nwiltsie-update-reference-paths
Browse files Browse the repository at this point in the history
Update cluster reference paths
  • Loading branch information
nwiltsie authored Oct 25, 2024
2 parents 7aaae34 + b406b37 commit d40c8b0
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 44 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,11 @@ For normal-only or tumour-only samples, exclude the fields for the other state.
| `scatter_count` | Yes | integer | Number of intervals to divide into for parallelization |
| `intervals` | Yes | path | Use all .list in inputs for WGS; Set to absolute path to targeted exome interval file (with .interval_list, .list, .intervals, or .bed suffix) |
| `gatk_ir_compression` | No | integer | Compression level for BAMs output by IndelRealigner. Default: 0. Range: 0-9 |
| `reference_fasta` | Yes | path | Absolute path to reference genome fasta file, e.g., `/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta` |
| `bundle_mills_and_1000g_gold_standard_indels_vcf_gz` | Yes | path | Absolute path to Mills & 1000G Gold Standard Indels file, e.g., `/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz` |
| `bundle_known_indels_vcf_gz` | Yes | path | Absolute path to known indels file, e.g., `/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz` |
| `bundle_v0_dbsnp138_vcf_gz` | Yes | path | Absolute path to dbsnp file, e.g., `/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz` |
| `bundle_contest_hapmap_3p3_vcf_gz` | Yes | path | Absolute path to HapMap 3.3 biallelic sites file, e.g., `/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz` |
| `reference_fasta` | Yes | path | Absolute path to reference genome fasta file, e.g., `/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta` |
| `bundle_mills_and_1000g_gold_standard_indels_vcf_gz` | Yes | path | Absolute path to Mills & 1000G Gold Standard Indels file, e.g., `/hot/resource/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz` |
| `bundle_known_indels_vcf_gz` | Yes | path | Absolute path to known indels file, e.g., `/hot/resource/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz` |
| `bundle_v0_dbsnp138_vcf_gz` | Yes | path | Absolute path to dbsnp file, e.g., `/hot/resource/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz` |
| `bundle_contest_hapmap_3p3_vcf_gz` | Yes | path | Absolute path to HapMap 3.3 biallelic sites file, e.g., `/hot/resource/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz` |
| `work_dir` | optional | path | Path of working directory for Nextflow. When included in the sample config file, Nextflow intermediate files and logs will be saved to this directory. With ucla_cds, the default is `/scratch` and should only be changed for testing/development. Changing this directory to `/hot` or `/tmp` can lead to high server latency and potential disk space limitations, respectively. |
| `base_resource_update` | optional | namespace | Namespace of parameters to update base resource allocations in the pipeline. Usage and structure are detailed in `template.config` and below. |

Expand Down
12 changes: 6 additions & 6 deletions config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ params {

// Reference - Used here hg38 decoy version
// GATK requires the reference fasta to be accompanied by a .fai index and .dict dictionary associated with the fasta for fast random access
// These can be found in the same folder as the reference here: /hot/ref/reference/GRCh38-BI-20160721
reference_fasta = "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"
// These can be found in the same folder as the reference here: /hot/resource/reference-genome/GRCh38-BI-20160721
reference_fasta = "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"

// Whether to parallelize the pipeline by chromosome or by splitting into equal-sized intervals
// The scatter_count and extra args below only go into effect if parallelize_by_chromosome is disabled
Expand All @@ -51,10 +51,10 @@ params {
split_intervals_extra_args = ''

// GATK bundle - Used here hg38 decoy version
bundle_mills_and_1000g_gold_standard_indels_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
bundle_known_indels_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz"
bundle_v0_dbsnp138_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz"
bundle_contest_hapmap_3p3_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz"
bundle_mills_and_1000g_gold_standard_indels_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
bundle_known_indels_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz"
bundle_v0_dbsnp138_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz"
bundle_contest_hapmap_3p3_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz"

// Base resource allocation updater
// See README for adding parameters to update the base resource allocations
Expand Down
26 changes: 13 additions & 13 deletions test/configtest-F16.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@
"params": {
"aligner": "BWA-MEM2-2.2.1",
"blcds_registered_dataset": false,
"bundle_contest_hapmap_3p3_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz",
"bundle_contest_hapmap_3p3_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz.tbi",
"bundle_known_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz",
"bundle_known_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
"bundle_v0_dbsnp138_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz",
"bundle_v0_dbsnp138_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz.tbi",
"bundle_contest_hapmap_3p3_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz",
"bundle_contest_hapmap_3p3_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz.tbi",
"bundle_known_indels_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz",
"bundle_known_indels_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
"bundle_v0_dbsnp138_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz",
"bundle_v0_dbsnp138_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz.tbi",
"cache_intermediate_pipeline_steps": false,
"dataset_id": "A-mini",
"docker_container_registry": "ghcr.io/uclahs-cds",
Expand All @@ -75,7 +75,7 @@
"input": {
"BAM": {
"tumor": [
"/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam"
"/hot/data/unregistered/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam"
]
},
"recalibration_table": [
Expand Down Expand Up @@ -228,13 +228,13 @@
"memory": "1 GB"
}
},
"reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta",
"reference_fasta_dict": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.dict",
"reference_fasta_fai": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta.fai",
"reference_fasta": "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta",
"reference_fasta_dict": "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.dict",
"reference_fasta_fai": "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta.fai",
"samples_to_process": [
{
"id": "4915723",
"path": "/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam",
"path": "/hot/data/unregistered/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam",
"sample_type": "tumor"
}
],
Expand Down
26 changes: 13 additions & 13 deletions test/configtest-F32.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@
"params": {
"aligner": "BWA-MEM2-2.2.1",
"blcds_registered_dataset": false,
"bundle_contest_hapmap_3p3_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz",
"bundle_contest_hapmap_3p3_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz.tbi",
"bundle_known_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz",
"bundle_known_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
"bundle_v0_dbsnp138_vcf_gz": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz",
"bundle_v0_dbsnp138_vcf_gz_tbi": "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz.tbi",
"bundle_contest_hapmap_3p3_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz",
"bundle_contest_hapmap_3p3_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz.tbi",
"bundle_known_indels_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz",
"bundle_known_indels_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz",
"bundle_mills_and_1000g_gold_standard_indels_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi",
"bundle_v0_dbsnp138_vcf_gz": "/hot/resource/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz",
"bundle_v0_dbsnp138_vcf_gz_tbi": "/hot/resource/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz.tbi",
"cache_intermediate_pipeline_steps": false,
"dataset_id": "A-mini",
"docker_container_registry": "ghcr.io/uclahs-cds",
Expand All @@ -75,7 +75,7 @@
"input": {
"BAM": {
"tumor": [
"/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam"
"/hot/data/unregistered/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam"
]
},
"recalibration_table": [
Expand Down Expand Up @@ -228,13 +228,13 @@
"memory": "1 GB"
}
},
"reference_fasta": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta",
"reference_fasta_dict": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.dict",
"reference_fasta_fai": "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta.fai",
"reference_fasta": "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta",
"reference_fasta_dict": "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.dict",
"reference_fasta_fai": "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta.fai",
"samples_to_process": [
{
"id": "4915723",
"path": "/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam",
"path": "/hot/data/unregistered/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam",
"sample_type": "tumor"
}
],
Expand Down
12 changes: 6 additions & 6 deletions test/nftest.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ params {

// Reference - Used here hg38 decoy version
// GATK requires the reference fasta to be accompanied by a .fai index and .dict dictionary associated with the fasta for fast random access
// These can be found in the same folder as the reference here: /hot/ref/reference/GRCh38-BI-20160721
reference_fasta = "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"
// These can be found in the same folder as the reference here: /hot/resource/reference-genome/GRCh38-BI-20160721
reference_fasta = "/hot/resource/reference-genome/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"

// Whether to parallelize the pipeline by chromosome or by splitting into equal-sized intervals
// The scatter_count and extra args below only go into effect if parallelize_by_chromosome is disabled
Expand All @@ -49,10 +49,10 @@ params {
split_intervals_extra_args = ''

// GATK bundle - Used here hg38 decoy version
bundle_mills_and_1000g_gold_standard_indels_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
bundle_known_indels_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz"
bundle_v0_dbsnp138_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz"
bundle_contest_hapmap_3p3_vcf_gz = "/hot/ref/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz"
bundle_mills_and_1000g_gold_standard_indels_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz"
bundle_known_indels_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/Homo_sapiens_assembly38.known_indels.vcf.gz"
bundle_v0_dbsnp138_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf.gz"
bundle_contest_hapmap_3p3_vcf_gz = "/hot/resource/tool-specific-input/GATK/GRCh38/Biallelic/hapmap_3.3.hg38.BIALLELIC.PASS.2021-09-01.vcf.gz"
}

// Setup the pipeline config. DO NOT REMOVE THIS LINE!
Expand Down
2 changes: 1 addition & 1 deletion test/single.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ patient_id: TWGSAMIN000001
input:
BAM:
tumor:
- "/hot/resource/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam"
- "/hot/data/unregistered/SMC-HET/tumours/A-mini/bams/n1/output/S2.T-n1.bam"

0 comments on commit d40c8b0

Please sign in to comment.