From c28167f3e02887290201598ae41a5795f25e46de Mon Sep 17 00:00:00 2001 From: Saman Vaisipour Date: Thu, 28 Mar 2019 17:11:23 -0400 Subject: [PATCH] Second round of comments --- gcp_deepvariant_runner.py | 40 ++++++++++++++++------------------ gcp_deepvariant_runner_test.py | 4 ++-- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/gcp_deepvariant_runner.py b/gcp_deepvariant_runner.py index fe374e4..9425a09 100644 --- a/gcp_deepvariant_runner.py +++ b/gcp_deepvariant_runner.py @@ -419,9 +419,11 @@ def _get_bam_category(pipeline_args): return BamCategories.WGS_MEDIUM -def _set_args_based_on_bam_category(bam_category, pipeline_args): - """Sets pipeline args using the given bam_category and default flag values.""" +def _set_computational_flags_based_on_bam_size(pipeline_args): + """Automatically sets computational flags based on size of input BAM file.""" + bam_category = _get_bam_category(pipeline_args) default_flags = _DEFAULT_FLAGS[bam_category] + pipeline_args.shards = ( default_flags['make_examples_workers'] * default_flags['make_examples_cores_per_worker']) @@ -453,25 +455,6 @@ def _set_args_based_on_bam_category(bam_category, pipeline_args): pipeline_args.postprocess_variants_disk_gb = _POSTPROCESS_VARIANTS_DISK_GVCF -def _set_computational_flags_based_on_bam_size(pipeline_args): - """Automatically sets computational flags based on size of input BAM file.""" - # First validating all necessary flags are present. - if not (pipeline_args.docker_image and pipeline_args.docker_image_gpu): - raise ValueError('both --docker_image and --docker_image_gpu must be ' - 'provided with --set_optimized_flags_based_on_bam_size') - is_wes = pipeline_args.model.find(_WES_STANDARD) != -1 - is_wgs = pipeline_args.model.find(_WGS_STANDARD) != -1 - if is_wes == is_wgs: - raise ValueError('Unable to automatically set computational flags. Given ' - 'model is neither WGS nor WES: %s' % pipeline_args.model) - if not pipeline_args.bam.endswith(_BAM_FILE_SUFFIX): - raise ValueError( - 'Only able to automatically set computational flags for BAM files.') - - bam_category = _get_bam_category(pipeline_args) - _set_args_based_on_bam_category(bam_category, pipeline_args) - - def _run_make_examples(pipeline_args): """Runs the make_examples job.""" @@ -766,6 +749,21 @@ def get_extra_args(): def _validate_and_complete_args(pipeline_args): """Validates pipeline arguments and fills some missing args (if any).""" if pipeline_args.set_optimized_flags_based_on_bam_size: + # First validating all necessary flags are present. + if not (pipeline_args.docker_image and pipeline_args.docker_image_gpu): + raise ValueError('both --docker_image and --docker_image_gpu must be ' + 'provided with --set_optimized_flags_based_on_bam_size') + is_wes = pipeline_args.model.find(_WES_STANDARD) != -1 + is_wgs = pipeline_args.model.find(_WGS_STANDARD) != -1 + if not is_wes and not is_wgs: + raise ValueError('Unable to automatically set computational flags. Given ' + 'model is neither WGS nor WES: %s' % pipeline_args.model) + if is_wes and is_wgs: + raise ValueError('Unable to automatically set computational flags. Given ' + 'model matches both WGS and WES: %s' % pipeline_args.model) + if not pipeline_args.bam.endswith(_BAM_FILE_SUFFIX): + raise ValueError( + 'Only able to automatically set computational flags for BAM files.') _set_computational_flags_based_on_bam_size(pipeline_args) # Basic validation logic. More detailed validation is done by pipelines API. if pipeline_args.preemptible and pipeline_args.max_preemptible_tries <= 0: diff --git a/gcp_deepvariant_runner_test.py b/gcp_deepvariant_runner_test.py index d2c7096..0f704ce 100644 --- a/gcp_deepvariant_runner_test.py +++ b/gcp_deepvariant_runner_test.py @@ -560,7 +560,7 @@ def testRunSetOptimizedFlagsBasedOnBamSizeWesSmall_makeExamples( mock_obj_exist.return_value = True mock_can_write_to_bucket.return_value = True mock_object_size.return_value = 12 * 1024 * 1024 * 1024 - 1 - expected_workers = 8 + expected_workers = 16 expected_cores = 1 expected_shards = expected_workers * expected_cores expected_ram = expected_cores * gcp_deepvariant_runner._RAM_PER_CORE * 1024 @@ -852,7 +852,7 @@ def testRunSetOptimizedFlagsBasedOnBamSizeWesSmall_callVariants( mock_object_size.return_value = 12 * 1024 * 1024 * 1024 - 1 expected_workers = 1 expected_cores = 4 - expected_shards = 8 # equals to make_exmples: num_wokers * num_cores + expected_shards = 16 # equals to make_exmples: num_wokers * num_cores expected_ram = expected_cores * gcp_deepvariant_runner._RAM_PER_CORE * 1024 self._argv = [ '--project',