Skip to content

Commit

Permalink
update schema
Browse files Browse the repository at this point in the history
  • Loading branch information
nkwang24 committed Oct 24, 2024
1 parent f88ab7f commit 3d8167a
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 70 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ input:
| `liftover_direction` | string | Conversion direction: [GRCh37ToGRCh38, GRCh38ToGRCh37]. |
| `fasta_ref_37` | path | Path to the GRCh37 reference sequence (FASTA). |
| `fasta_ref_38` | path | Path to the GRCh38 reference sequence (FASTA). |
| `funcotator_data_source` | path | Path to [Funcotator data source](https://gatk.broadinstitute.org/hc/en-us/articles/360050815792-FuncotatorDataSourceDownloader) directory containing dbSNP, GENCODE and HGNC sources for SNV annotation. |
| `resource_bundle_path` | path | Path to unpacked [resource-bundle.zip](https://github.com/uclahs-cds/pipeline-StableLift/releases/download/v1.1.0/resource-bundle.zip). |
| `funcotator_data_source` | path | Path to [Funcotator data source](https://gatk.broadinstitute.org/hc/en-us/articles/360050815792-FuncotatorDataSourceDownloader) directory containing dbSNP, GENCODE and HGNC sources (required for SNV annotation).|

| Optional Parameter | Type | Default | Description |
| --------------------------- | ----------------------------------------------------------------------------------------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
Expand Down
6 changes: 3 additions & 3 deletions config/schema-snv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ repeat_bed:
type: 'Path'
mode: 'r'
required: true
help: 'RepeatMasker (v3.0.1) intervals from UCSC Table Browser for variant annotation in GRCh38 coordinates, included in resource-bundle.zip'
help: 'RepeatMasker (v3.0.1) intervals from UCSC Table Browser for variant annotation in target build coordinates, included in resource-bundle.zip'

funcotator_data_source:
type: 'Path'
required: true
mode: 'r'
help: 'Root data source folder for Funcotator from https://gatk.broadinstitute.org/hc/en-us/articles/360035889931-Funcotator-Information-and-Tutorial'
required: true
help: 'Path to Funcotator data source directory containing dbSNP, GENCODE and HGNC sources for SNV annotation'
147 changes: 85 additions & 62 deletions config/schema.yaml
Original file line number Diff line number Diff line change
@@ -1,53 +1,105 @@
---
sample_id:
type: 'String'
required: true
help: 'Sample id supplied from input yaml'

liftover_direction:
type: 'String'
# Required Parameters
output_dir:
type: 'Path'
mode: 'w'
required: true
help: 'Direction of LiftOver to perform'
choices:
- GRCh37ToGRCh38
- GRCh38ToGRCh37
help: 'Path to the directory where the output files are to be saved'

variant_caller:
type: 'String'
required: true
help: 'Tool used to call variants'
help: 'Variant calling algorithm used to generate input VCF'
choices:
- Mutect2
- HaplotypeCaller
- Mutect2
- Strelka2
- Muse2
- SomaticSniper
- Muse2
- Delly2-gSV
- Delly2-sSV

save_intermediate_files:
type: 'Bool'
rf_model:
type: 'Path'
mode: 'r'
required: true
default: false
help: 'Enable to store intermediate files'
help: 'Path to corresponding pre-trained random forest model'

output_dir:
type: 'Path'
mode: 'w'
liftover_direction:
type: 'String'
required: true
help: 'Absolute path to directory to store output'
help: 'Conversion direction'
choices:
- GRCh37ToGRCh38
- GRCh38ToGRCh37

fasta_ref_37:
type: 'Path'
mode: 'r'
required: true
help: 'GRCh37 FASTA reference'
help: 'Path to the GRCh37 reference sequence (FASTA)'

fasta_ref_38:
type: 'Path'
mode: 'r'
required: true
help: 'GRCh38 FASTA reference'
help: 'Path to the GRCh38 reference sequence (FASTA)'

resource_bundle_path:
type: 'Path'
mode: 'r'
required: true
help: 'Path to unpacked resource-bundle.zip'

# Optional Parameters
target_threshold:
type: 'RangedNumber'
required: false
min: 0
max: 1
default: ''
help: 'Target Stability Score threshold for variant filtering'

target_specificity:
type: 'RangedNumber'
required: false
min: 0
max: 1
default: ''
help: 'Target specificity based on whole genome validation set for variant filtering'

extract_features_cpus:
type: 'Integer'
required: false
default: 4
help: 'Number of cpus to use for parallel parsing of large VCFs (>1GB)'

save_intermediate_files:
type: 'Bool'
required: false
default: false
help: 'If set, save output files from intermediate pipeline processes'

blcds_registered_dataset:
type: 'Bool'
required: false
default: false
help: 'Set to true when using BLCDS folder structure; use false for now'

ucla_cds:
type: 'Bool'
required: false
default: false
help: 'If set, overwrite default memory and CPU values by UCLA cluster-specific configs'

# Internal parameters
src_fasta_id:
type: 'String'
required: true
help: 'Source reference genome build identifier'
choices:
- GRCh37
- GRCh38

src_fasta_ref:
type: 'Path'
Expand All @@ -67,6 +119,14 @@ src_fasta_dict:
required: true
help: 'Source reference sequence dictionary'

dest_fasta_id:
type: 'String'
required: true
help: 'Destination reference genome build identifier'
choices:
- GRCh37
- GRCh38

dest_fasta_ref:
type: 'Path'
mode: 'r'
Expand All @@ -89,41 +149,4 @@ chain_file:
type: 'Path'
mode: 'r'
required: true
help: 'Chain file corresponding to LiftOver direction, included in resource-bundle.zip'

rf_model:
type: 'Path'
mode: 'r'
required: true
help: 'Path to pre-trained random forest model (.Rds) corresponding to variant caller and LiftOver direction'

input:
type: 'Namespace'
required: true
help: 'Input sample'
elements:
vcf:
type: 'Path'
mode: 'r'
required: true
help: 'Input dataset supplied by input yaml'

target_threshold:
type: 'RangedNumber'
required: false
min: 0
max: 1
help: >-
Optional parameter specifying target Stability Score threshold for variant
filtering Default behavior without `target_threshold` or
`target_specificity` specified uses threshold maximizing F1-score in whole
genome validation set'.
target_specificity:
type: 'RangedNumber'
required: false
min: 0
max: 1
help: >-
Optional parameter specifying target specificity for variant filtering
based on whole genome validation set. Overrides `target_threshold`.
help: 'Chain file for LiftOver conversion'
9 changes: 5 additions & 4 deletions config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,17 @@ params {
// Path to pre-trained random forest model
rf_model = ""

// Path to reference fasta files
// Path to reference fasta files with corresponding index file (.fai) and sequence dictionary (.dict)
fasta_ref_37 = "" // GRCh37-EBI-hs37d5/hs37d5.fa
fasta_ref_38 = "" // GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta

// Path to Funcotator data source directory containing dbSNP, GENCODE and HGNC sources for SNV annotation
funcotator_data_source = ""

// Path to unpacked resource-bundle.zip
resource_bundle_path = ""

// Path to Funcotator data source directory containing dbSNP, GENCODE and HGNC sources (required for SNV annotation)
// https://gatk.broadinstitute.org/hc/en-us/articles/360050815792-FuncotatorDataSourceDownloader
funcotator_data_source = ""

// Optional parameter specifying target Stability Score threshold for variant filtering
// Default behavior without `target_threshold` or `target_specificity` uses threshold maximizing F1-score in whole genome validation set
// Must be in the range [0.0, 1.0]
Expand Down

0 comments on commit 3d8167a

Please sign in to comment.