uclahs-cds · sorelfitzgibbon · Jan 12, 2024 · Nov 30, 2023 · Dec 1, 2023 · Dec 6, 2023
diff --git a/README.md b/README.md
@@ -16,9 +16,7 @@
        - [vcf2maf](#vcf2maf)
   - [Inputs](#inputs)
   - [Outputs](#outputs)
-  - [Testing and Validation](#testing-and-validation)
-    - [Test Data Set](#test-data-set)
-    - [Performance Validation](#performance-validation)
+  - [Performance Validation and Resource Requirements](#performance-validation)
   - [References](#references)
   - [Discussions](https://github.com/uclahs-cds/pipeline-call-sSNV/discussions)
   - [Contributors](https://github.com/uclahs-cds/template-NextflowPipeline/graphs/contributors)
@@ -189,7 +187,7 @@ input:
 | `docker_container_registry` | no | string | Registry containing tool Docker images, optional. Default: `ghcr.io/uclahs-cds` |
 | `base_resource_update` | optional | namespace | Namespace of parameters to update base resource allocations in the pipeline. Usage and structure are detailed in `template.config` and below. |
 
- *Providing `intersect_regions` is required and will limit the final output to just those regions.  All regions of the reference genome could be provided as a `bed` file with all contigs, however it is HIGHLY recommended to remove `decoy` contigs from the human reference genome. Including these thousands of small contigs will require the user to increase available memory for `Mutect2` and will cause a very long runtime for `Strelka2`. See [Discussion here](https://github.com/uclahs-cds/pipeline-call-sSNV/discussions/216). A GRCh38 `bed.gz` file can be found here: `/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/Homo_sapiens_assembly38_no-decoy.bed.gz`. For other genome versions, you may be able to use [UCSC Liftover](https://genome.ucsc.edu/cgi-bin/hgLiftOver) to convert.
+ *Providing `intersect_regions` is required and will limit the final output to just those regions.  All regions of the reference genome could be provided as a `bed` file with all contigs, however it is HIGHLY recommended to remove `decoy` contigs from the human reference genome. Including these thousands of small contigs will require the user to increase available memory for `Mutect2` and will cause a very long runtime for `Strelka2`. See [Discussion here](https://github.com/uclahs-cds/pipeline-call-sSNV/discussions/216). For `uclahs-cds` users, a GRCh38 `bed.gz` file can be found here: `/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/Homo_sapiens_assembly38_no-decoy.bed.gz`.
 
  ### Base resource allocation updaters
 To optionally update the base resource (cpus or memory) allocations for processes, use the following structure and add the necessary parts to the [input.config](config/template.config) file. The default allocations can be found in the [node-specific config files](./config/)
@@ -290,18 +288,16 @@ base_resource_update {
 | BCFtools-{version}_{sample_id}_Venn-diagram.tiff | .tiff | Venn Diagram with intersection counts for all variants (`1-or-more`)
 | BCFtools-{version}_{sample_id}_SNV-concat.vcf.gz | .vcf.gz | Single SNV VCF with all `2-or-more` variants and mixed annotation |
 | BCFtools-{version}_{sample_id}_SNV-concat.maf.gz | .maf.gz | Single SNV MAF with all `2-or-more` variants and mixed annotation |
-## Testing and Validation
 
-Testing was performed primarily in the Boutros Lab SLURM Development cluster using F72 node. Metrics below will be updated where relevant with additional testing and tuning outputs.
+### Performance Validation
+Testing was performed in the Boutros Lab SLURM Development cluster. Metrics below will be updated where relevant with additional testing and tuning outputs. Pipeline version used here is v4.0.0-rc.1
 
-### Test Data Set
+#### Whole Exomes
+General estimates, with wide variation, are that whole exome sequences require 16 cpus and 32 GB of memory to run all of the pipeline algorithms.  If MuSE is excluded 8 cpus and 16 GB of memory are sufficient.  Run time for a test pair of exome tumor/normal input BAMs of 4 GB/5 GB was in both cases 1 to 2 hours.
 
-| Data Set | Run Configuration | Output Dir | Control Sample | Tumor Sample |
-| ------ | ------ | ------- | ------ | ------- |
-| A-full-P2 |/hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/unreleased/maotian-update-README/analysis/all/A-full/nextflow.config | /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/unreleased/maotian-update-README/analysis/all/A-full/output | HG002.N | P2 |
 
-### Performance Validation
-Testing was performed in the Boutros Lab SLURM Development cluster. Metrics below will be updated where relevant with additional testing and tuning outputs. Pipeline version used here is v4.0.0-rc.1
+#### Whole Genomes
+General estimates, with wide variation, are that whole genome sequences require 72 cpus and 144 GB of memory to run all of the pipeline algorithms. If MuSE is excluded 8 cpus and 16 GB of memory are sufficient, but run time could be very long. Run time for a test pair of WGS tumor/normal input BAMs of 400 GB/200 GB was 15 hours for 72 cpus/144 GB, and 52 hours for 8 cpus/16 GB excluding MuSE.
 
 #### Mutect2
 Duration: 3h 25m 24s

@@ -8,49 +8,67 @@ process {
     }
     withName: call_sSNV_SomaticSniper {
         cpus = 1
-        memory = 10.GB
+        memory = 1.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 3.GB
+            }
+        }
     }
     withName: convert_BAM2Pileup_SAMtools {
         cpus = 1
-        memory = 10.GB
+        memory = 1.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 3.GB
+            }
+        }
     }
     withName: create_IndelCandidate_SAMtools {
         cpus = 1
-        memory = 10.GB
+        memory = 1.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 3.GB
+            }
+        }
     }
     withName: call_sIndel_Manta {
-        cpus = 16
-        memory = 15.GB
+        cpus = 6
+        memory = 6.GB
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 5.GB
+                operand = 3.GB
             }
         }
     }
     withName: call_sSNV_Strelka2 {
-        cpus = 16
-        memory = 15.GB
+        cpus = 6
+        memory = 2.GB
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 5.GB
+                operand = 4.GB
             }
         }
     }
     withName: call_sSNV_Mutect2 {
-        cpus = 2
-        memory = 5.GB
+        cpus = 1
+        memory = 3.GB
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 5.GB
+                operand = 2.GB
             }
         }
     }
     withName: run_LearnReadOrientationModel_GATK {
         cpus = 1
-        memory = 16.GB
+        memory = 8.GB
         retry_strategy {
             memory {
                 strategy = 'exponential'
@@ -59,22 +77,22 @@ process {
         }
     }
     withName: call_sSNV_MuSE {
-        cpus = 10
-        memory = 16.GB
+        cpus = 6
+        memory = 24.GB
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 16.GB
+                operand = 8.GB
             }
         }
     }
     withName: run_sump_MuSE {
-        cpus = 4
-        memory = 16.GB
+        cpus = 8
+        memory = 24.GB
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 16.GB
+                operand = 8.GB
             }
         }
     }

@@ -6,19 +6,19 @@
 process {
     withName: run_validate_PipeVal {
         cpus = 1
-        memory = 1.GB
+        memory = 2.GB
     }
     withName: call_sSNV_SomaticSniper {
         cpus = 1
-        memory = 3.GB
+        memory = 2.GB
     }
     withName: convert_BAM2Pileup_SAMtools {
         cpus = 1
-        memory = 3.GB
+        memory = 2.GB
     }
     withName: create_IndelCandidate_SAMtools {
         cpus = 1
-        memory = 3.GB
+        memory = 2.GB
     }
     withName: call_sIndel_Manta {
         cpus = 2
@@ -28,10 +28,8 @@ process {
     }
     withName: plot_VennDiagram_R {
         cpus = 2
-        memory = 1.GB
     }
     withName: concat_VCFs_BCFtools {
         cpus = 2
-        memory = 2.GB
     }
 }
@@ -62,7 +62,7 @@ process {
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 5.GB
+                operand = 3.GB
             }
         }
     }
@@ -82,7 +82,7 @@ process {
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 10.GB
+                operand = 16.GB
             }
         }
     }
@@ -92,7 +92,7 @@ process {
         retry_strategy {
             memory {
                 strategy = 'add'
-                operand = 10.GB
+                operand = 16.GB
             }
         }
     }

@@ -0,0 +1,119 @@
+// Other processes will only run one at a time, so
+// we don't need to control their resources.
+
+process {
+    withName: run_validate_PipeVal {
+        cpus = 1
+        memory = 1.GB
+    }
+    withName: call_sSNV_SomaticSniper {
+        cpus = 1
+        memory = 1.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 2.GB
+            }
+        }
+    }
+    withName: convert_BAM2Pileup_SAMtools {
+        cpus = 1
+        memory = 1.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 2.GB
+            }
+        }
+    }
+    withName: create_IndelCandidate_SAMtools {
+        cpus = 1
+        memory = 1.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 2.GB
+            }
+        }
+    }
+    withName: call_sIndel_Manta {
+        cpus = 4
+        memory = 3.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 3.GB
+            }
+        }
+    }
+    withName: call_sSNV_Strelka2 {
+        cpus = 4
+        memory = 2.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 2.GB
+            }
+        }
+    }
+    withName: call_sSNV_Mutect2 {
+        cpus = 1
+        memory = 3.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 2.GB
+            }
+        }
+    }
+    withName: run_LearnReadOrientationModel_GATK {
+        cpus = 1
+        memory = 8.GB
+        retry_strategy {
+            memory {
+                strategy = 'exponential'
+                operand = 2
+            }
+        }
+    }
+    withName: call_sSNV_MuSE {
+        cpus = 4
+        memory = 10.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 6.GB
+            }
+        }
+    }
+    withName: run_sump_MuSE {
+        cpus = 4
+        memory = 12.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 4.GB
+            }
+        }
+    }
+    withName: plot_VennDiagram_R {
+        cpus = 2
+        memory = 5.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 10.GB
+            }
+        }
+    }
+    withName: concat_VCFs_BCFtools {
+        cpus = 2
+        memory = 5.GB
+        retry_strategy {
+            memory {
+                strategy = 'add'
+                operand = 10.GB
+            }
+        }
+    }
+}
@@ -18,7 +18,6 @@ params {
 
     //algorithm version
     pipeval_version = "4.0.0-rc.2"
-    samtools_version = "1.16.1"
     GATK_version = "4.4.0.0"
     somaticsniper_version = "1.0.5.0"
     bam_readcount_version = "0.8.0"
@@ -29,7 +28,6 @@ params {
     BCFtools_version = "1.17"
     call_ssnv_r_version = "dev"
     vcf2maf_version = "v1.6.18"
-    docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}"
     docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}"
     docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}"
     docker_image_somaticsniper = "${-> params.docker_container_registry}/somaticsniper:${params.somaticsniper_version}"

@@ -54,7 +54,18 @@ if (params.max_cpus < 16 || params.max_memory < 30) {
         ------------------------------------
         ERROR: Insufficient resources: ${params.max_cpus} CPUs and ${params.max_memory} of memory.
         ------------------------------------
-        To run Mutect2 or MuSE. this pipeline requires at least 16 CPUs and 32 GB of memory.
+        To run Mutect2. this pipeline requires at least 8 CPUs and 16 GB of memory.
+        To run MuSE. this pipeline requires at least 16 CPUs and 32 GB of memory.
+        """
+        }
+    }
+else if (params.max_cpus < 16 || params.max_memory < 32) {
+    if (params.algorithm.contains('muse')) {
+        error """\
+        ------------------------------------
+        ERROR: Insufficient resources: ${params.max_cpus} CPUs and ${params.max_memory} of memory.
+        ------------------------------------
+        To run MuSE. this pipeline requires at least 16 CPUs and 32 GB of memory.
         """
         }
     }

@@ -5,4 +5,4 @@ Maintainers: ['[email protected]']
 Contributors: ['Mao Tian', 'Bugh Caden', 'Helena Winata', 'Yash Patel', 'Sorel Fitz-Gibbon']
 Languages: ['Docker', 'Nextflow']
 Dependencies: ['Docker', 'Nextflow']
-Tools: ['GATK 4.4.0.0', 'SomaticSniper v1.0.5.0', 'SAMtools v1.16.1', 'Strelka2 v2.9.10', 'Manta v1.6.0', 'MuSE v2.0.4', 'BCFtools v1.17', 'R v4.3.1', 'VennDiagram v1.7.3', 'vcf2maf v1.6.18']
+Tools: ['GATK 4.4.0.0', 'SomaticSniper v1.0.5.0', 'Strelka2 v2.9.10', 'Manta v1.6.0', 'MuSE v2.0.4', 'BCFtools v1.17', 'R v4.3.1', 'VennDiagram v1.7.3', 'vcf2maf v1.6.18']