Second round of comments

googlegenomics · Mar 28, 2019 · c28167f · c28167f
1 parent 70dae8c
commit c28167f
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 23 deletions.
diff --git a/gcp_deepvariant_runner.py b/gcp_deepvariant_runner.py
@@ -419,9 +419,11 @@ def _get_bam_category(pipeline_args):
       return BamCategories.WGS_MEDIUM
 
 
-def _set_args_based_on_bam_category(bam_category, pipeline_args):
-  """Sets pipeline args using the given bam_category and default flag values."""
+def _set_computational_flags_based_on_bam_size(pipeline_args):
+  """Automatically sets computational flags based on size of input BAM file."""
+  bam_category = _get_bam_category(pipeline_args)
   default_flags = _DEFAULT_FLAGS[bam_category]
+
   pipeline_args.shards = (
       default_flags['make_examples_workers'] *
       default_flags['make_examples_cores_per_worker'])
@@ -453,25 +455,6 @@ def _set_args_based_on_bam_category(bam_category, pipeline_args):
     pipeline_args.postprocess_variants_disk_gb = _POSTPROCESS_VARIANTS_DISK_GVCF
 
 
-def _set_computational_flags_based_on_bam_size(pipeline_args):
-  """Automatically sets computational flags based on size of input BAM file."""
-  # First validating all necessary flags are present.
-  if not (pipeline_args.docker_image and pipeline_args.docker_image_gpu):
-    raise ValueError('both --docker_image and --docker_image_gpu must be '
-                     'provided with --set_optimized_flags_based_on_bam_size')
-  is_wes = pipeline_args.model.find(_WES_STANDARD) != -1
-  is_wgs = pipeline_args.model.find(_WGS_STANDARD) != -1
-  if is_wes == is_wgs:
-    raise ValueError('Unable to automatically set computational flags. Given '
-                     'model is neither WGS nor WES: %s' % pipeline_args.model)
-  if not pipeline_args.bam.endswith(_BAM_FILE_SUFFIX):
-    raise ValueError(
-        'Only able to automatically set computational flags for BAM files.')
-
-  bam_category = _get_bam_category(pipeline_args)
-  _set_args_based_on_bam_category(bam_category, pipeline_args)
-
-
 def _run_make_examples(pipeline_args):
   """Runs the make_examples job."""
 
@@ -766,6 +749,21 @@ def get_extra_args():
 def _validate_and_complete_args(pipeline_args):
   """Validates pipeline arguments and fills some missing args (if any)."""
   if pipeline_args.set_optimized_flags_based_on_bam_size:
+    # First validating all necessary flags are present.
+    if not (pipeline_args.docker_image and pipeline_args.docker_image_gpu):
+      raise ValueError('both --docker_image and --docker_image_gpu must be '
+                       'provided with --set_optimized_flags_based_on_bam_size')
+    is_wes = pipeline_args.model.find(_WES_STANDARD) != -1
+    is_wgs = pipeline_args.model.find(_WGS_STANDARD) != -1
+    if not is_wes and not is_wgs:
+      raise ValueError('Unable to automatically set computational flags. Given '
+                       'model is neither WGS nor WES: %s' % pipeline_args.model)
+    if is_wes and is_wgs:
+      raise ValueError('Unable to automatically set computational flags. Given '
+                       'model matches both WGS and WES: %s' % pipeline_args.model)
+    if not pipeline_args.bam.endswith(_BAM_FILE_SUFFIX):
+      raise ValueError(
+          'Only able to automatically set computational flags for BAM files.')
     _set_computational_flags_based_on_bam_size(pipeline_args)
   # Basic validation logic. More detailed validation is done by pipelines API.
   if pipeline_args.preemptible and pipeline_args.max_preemptible_tries <= 0:

diff --git a/gcp_deepvariant_runner_test.py b/gcp_deepvariant_runner_test.py
@@ -560,7 +560,7 @@ def testRunSetOptimizedFlagsBasedOnBamSizeWesSmall_makeExamples(
     mock_obj_exist.return_value = True
     mock_can_write_to_bucket.return_value = True
     mock_object_size.return_value = 12 * 1024 * 1024 * 1024 - 1
-    expected_workers = 8
+    expected_workers = 16
     expected_cores = 1
     expected_shards = expected_workers * expected_cores
     expected_ram = expected_cores * gcp_deepvariant_runner._RAM_PER_CORE * 1024
@@ -852,7 +852,7 @@ def testRunSetOptimizedFlagsBasedOnBamSizeWesSmall_callVariants(
     mock_object_size.return_value = 12 * 1024 * 1024 * 1024 - 1
     expected_workers = 1
     expected_cores = 4
-    expected_shards = 8  # equals to make_exmples: num_wokers * num_cores
+    expected_shards = 16  # equals to make_exmples: num_wokers * num_cores
     expected_ram = expected_cores * gcp_deepvariant_runner._RAM_PER_CORE * 1024
     self._argv = [
         '--project',