From 6de91b9150e3befb97952f7564cadcd0ea659e8e Mon Sep 17 00:00:00 2001
From: nkwang <nkwang@mednet.ucla.edu>
Date: Thu, 17 Oct 2024 18:54:59 -0700
Subject: [PATCH] add extract_features_cpus to README

---
 README.md | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 6bfbe25..47f3ff8 100644
--- a/README.md
+++ b/README.md
@@ -96,15 +96,16 @@ input:
 
 | Optional Parameter          | Type                                                                                      | Default                      | Description                                                                                                                                                                                                                                                                                                                                                                           |
 | --------------------------- | ----------------------------------------------------------------------------------------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `target_threshold`          | numeric                                                                                   | `""`             | Target Stability Score threshold for variant filtering: [0, 1]. |
-| `target_specificity`        | numeric                                                                                   | `""`             | Target specificity based on whole genome validation set for variant filtering: [0, 1]. |
+| `target_threshold`          | numeric                                                                                   | `""`                         | Target Stability Score threshold for variant filtering: [0, 1]. |
+| `target_specificity`        | numeric                                                                                   | `""`                         | Target specificity based on whole genome validation set for variant filtering: [0, 1]. |
+| `extract_features_cpus`     | int                                                                                       | `4`                          | Number of cpus to use for parallel parsing of large VCFs (>1GB). |
 | `work_dir`                  | path                                                                                      | `/scratch/$SLURM_JOB_ID`     | Path of working directory for Nextflow. When included in the sample config file, Nextflow intermediate files and logs will be saved to this directory. With `ucla_cds`, the default is `/scratch` and should only be changed for testing/development. Changing this directory to `/hot` or `/tmp` can lead to high server latency and potential disk space limitations, respectively. |
 | `save_intermediate_files`   | boolean                                                                                   | false                        | If set, save output files from intermediate pipeline processes.                                                                                                                                                                                                                                                                                                                       |
 | `min_cpus`                  | int                                                                                       | 1                            | Minimum number of CPUs that can be assigned to each process.                                                                                                                                                                                                                                                                                                                          |
 | `max_cpus`                  | int                                                                                       | `SysHelper.getAvailCpus()`   | Maximum number of CPUs that can be assigned to each process.                                                                                                                                                                                                                                                                                                                          |
 | `min_memory`                | [MemoryUnit](https://www.nextflow.io/docs/latest/script.html#implicit-classes-memoryunit) | `1.MB`                       | Minimum amount of memory that can be assigned to each process.                                                                                                                                                                                                                                                                                                                        |
 | `max_memory`                | [MemoryUnit](https://www.nextflow.io/docs/latest/script.html#implicit-classes-memoryunit) | `SysHelper.getAvailMemory()` | Maximum amount of memory that can be assigned to each process.                                                                                                                                                                                                                                                                                                                        |
-| `dataset_id`                | string                                                                                    | `""`                         | Dataset ID to be used as output filename prefix.                                                                                                                                                                                                                                                                                                                                         |
+| `dataset_id`                | string                                                                                    | `""`                         | Dataset ID to be used as output filename prefix.                                                                                                                                                                                                                                                                                                                                      |
 | `blcds_registered_dataset`  | boolean                                                                                   | false                        | Set to true when using BLCDS folder structure; use false for now.                                                                                                                                                                                                                                                                                                                     |
 | `ucla_cds`                  | boolean                                                                                   | true                         | If set, overwrite default memory and CPU values by UCLA cluster-specific configs.                                                                                                                                                                                                                                                                                                     |
 
@@ -114,10 +115,10 @@ input:
 
 | Output | Description |
 | ------------ | ------------------------ |
-| `*_StableLift.vcf.gz` | Output VCF in target build coordinates with variant annotations and predicted Stability Scores. |
-| `*_StableLift.vcf.gz.tbi` | Output VCF tabix index. |
-| `*_StableLift-filtered.vcf.gz` | Filtered output VCF with predicted "Unstable" variants removed. |
-| `*_StableLift-filtered.vcf.gz.tbi` | Filtered output VCF tabix index. |
+| `*_StableLift-${target_build}.vcf.gz` | Output VCF in target build coordinates with variant annotations and predicted Stability Scores. |
+| `*_StableLift-${target_build}.vcf.gz.tbi` | Output VCF tabix index. |
+| `*_StableLift-${target_build}_filtered.vcf.gz` | Filtered output VCF with predicted "Unstable" variants removed. |
+| `*_StableLift-${target_build}_filtered.vcf.gz.tbi` | Filtered output VCF tabix index. |
 
 ---
 
@@ -125,7 +126,7 @@ input:
 
 ### Test Dataset
 
-10 whole genomes from [The Cancer Genome Atlas (TCGA-SARC)](https://portal.gdc.cancer.gov/projects/TCGA-SARC) were used to test pipeline outputs and validate model performance. All data was processed using [standardized Nextflow pipelines](https://github.com/uclahs-cds/metapipeline-DNA). Somatic VCFs from GRCh37 and GRCh38 alignments are available for the four supported sSNV callers as [release attachments](https://github.com/uclahs-cds/pipeline-StableLift/releases).
+10 whole genomes from [The Cancer Genome Atlas (TCGA-SARC)](https://portal.gdc.cancer.gov/projects/TCGA-SARC) were used to test pipeline outputs and validate model performance. All data was processed using [standardized Nextflow pipelines](https://github.com/uclahs-cds/metapipeline-DNA). Somatic VCFs from GRCh37 and GRCh38 alignments are available for the four supported sSNV callers and DELLY2 sSV as [release attachments](https://github.com/uclahs-cds/pipeline-StableLift/releases).
 
 | Donor ID       | Normal Sample ID          | Tumour Sample ID          |
 |----------------|---------------------------|---------------------------|