Skip to content

Commit

Permalink
Format Python files.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 518020013
  • Loading branch information
pichuan authored and copybara-github committed Mar 20, 2023
1 parent 3579c75 commit f068215
Show file tree
Hide file tree
Showing 4 changed files with 523 additions and 294 deletions.
4 changes: 3 additions & 1 deletion deeptrio/dt_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@

# The dimensions of a pileup image tensor as height x width x rank.
PILEUP_DEFAULT_DIMS = [
PILEUP_DEFAULT_HEIGHT, PILEUP_DEFAULT_WIDTH, PILEUP_NUM_CHANNELS
PILEUP_DEFAULT_HEIGHT,
PILEUP_DEFAULT_WIDTH,
PILEUP_NUM_CHANNELS,
]

# Number of classes represented in the data set. The three classes are
Expand Down
271 changes: 183 additions & 88 deletions deeptrio/make_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,77 +54,129 @@

# Flags related to samples in DeepTrio:
SAMPLE_NAME_TO_TRAIN_ = flags.DEFINE_string(
'sample_name_to_train', None,
'Optional - if not set, default to the value in '
'--sample_name, i.e. the child. The default is set to be backward '
'compatible. If set, it has to match one of --sample_name, '
'--sample_name_parent1, or --sample_name_parent2. '
'Only used for training. When run in calling mode, this is unused because '
'examples are generated for all 3 samples together.')
'sample_name_to_train',
None,
(
'Optional - if not set, default to the value in --sample_name, i.e. the'
' child. The default is set to be backward compatible. If set, it has'
' to match one of --sample_name, --sample_name_parent1, or'
' --sample_name_parent2. Only used for training. When run in calling'
' mode, this is unused because examples are generated for all 3 samples'
' together.'
),
)
READS_ = flags.DEFINE_string(
'reads', None,
'Required. Aligned, sorted, indexed BAM file containing reads from the '
'child of the trio. '
'Should be aligned to a reference genome compatible with --ref. '
'Can provide multiple BAMs (comma-separated).')
'reads',
None,
(
'Required. Aligned, sorted, indexed BAM file containing reads from the '
'child of the trio. '
'Should be aligned to a reference genome compatible with --ref. '
'Can provide multiple BAMs (comma-separated).'
),
)
READS_PARENT1_ = flags.DEFINE_string(
'reads_parent1', None,
'Required. Aligned, sorted, indexed BAM file containing reads from parent '
'1 of the trio. Should be aligned to a reference genome compatible with '
'--ref. Can provide multiple BAMs (comma-separated).')
'reads_parent1',
None,
(
'Required. Aligned, sorted, indexed BAM file containing reads from'
' parent 1 of the trio. Should be aligned to a reference genome'
' compatible with --ref. Can provide multiple BAMs (comma-separated).'
),
)
READS_PARENT2_ = flags.DEFINE_string(
'reads_parent2', None,
'Aligned, sorted, indexed BAM file containing reads from parent 2 of the '
'trio. Should be aligned to a reference genome compatible with --ref. '
'Can provide multiple BAMs (comma-separated).')
'reads_parent2',
None,
(
'Aligned, sorted, indexed BAM file containing reads from parent 2 of'
' the trio. Should be aligned to a reference genome compatible with'
' --ref. Can provide multiple BAMs (comma-separated).'
),
)
DOWNSAMPLE_FRACTION_CHILD_ = flags.DEFINE_float(
'downsample_fraction_child', NO_DOWNSAMPLING,
'If not ' + str(NO_DOWNSAMPLING) + ' must be a value between 0.0 and 1.0. '
'downsample_fraction_child',
NO_DOWNSAMPLING,
'If not '
+ str(NO_DOWNSAMPLING)
+ ' must be a value between 0.0 and 1.0. '
'Reads will be kept (randomly) with a probability of downsample_fraction '
'from the input child BAM. This argument makes it easy to create examples '
'as though the input BAM had less coverage.')
'as though the input BAM had less coverage.',
)
DOWNSAMPLE_FRACTION_PARENTS_ = flags.DEFINE_float(
'downsample_fraction_parents', NO_DOWNSAMPLING,
'If not ' + str(NO_DOWNSAMPLING) + ' must be a value between 0.0 and 1.0. '
'downsample_fraction_parents',
NO_DOWNSAMPLING,
'If not '
+ str(NO_DOWNSAMPLING)
+ ' must be a value between 0.0 and 1.0. '
'Reads will be kept (randomly) with a probability of downsample_fraction '
'from the input parent BAMs. This argument makes it easy to create examples'
' as though the input BAMs had less coverage.')
' as though the input BAMs had less coverage.',
)
SAMPLE_NAME_ = flags.DEFINE_string(
'sample_name', '',
'Child sample name to use for our sample_name in the output '
'Variant/DeepVariantCall protos. If not specified, will be inferred from '
'the header information from --reads.')
'sample_name',
'',
(
'Child sample name to use for our sample_name in the output'
' Variant/DeepVariantCall protos. If not specified, will be inferred'
' from the header information from --reads.'
),
)
SAMPLE_NAME_PARENT1_ = flags.DEFINE_string(
'sample_name_parent1', '',
'Parent1 Sample name to use for our sample_name in the output '
'Variant/DeepVariantCall protos. If not specified, will be inferred from '
'the header information from --reads_parent1.')
'sample_name_parent1',
'',
(
'Parent1 Sample name to use for our sample_name in the output'
' Variant/DeepVariantCall protos. If not specified, will be inferred'
' from the header information from --reads_parent1.'
),
)
SAMPLE_NAME_PARENT2_ = flags.DEFINE_string(
'sample_name_parent2', '',
'Parent2 Sample name to use for our sample_name in the output '
'Variant/DeepVariantCall protos. If not specified, will be inferred from '
'the header information from --reads_parent2.')
'sample_name_parent2',
'',
(
'Parent2 Sample name to use for our sample_name in the output'
' Variant/DeepVariantCall protos. If not specified, will be inferred'
' from the header information from --reads_parent2.'
),
)
PILEUP_IMAGE_HEIGHT_PARENT_ = flags.DEFINE_integer(
'pileup_image_height_parent', 0,
'Height for the parent pileup image. If 0, uses the default height')
'pileup_image_height_parent',
0,
'Height for the parent pileup image. If 0, uses the default height',
)
PILEUP_IMAGE_HEIGHT_CHILD_ = flags.DEFINE_integer(
'pileup_image_height_child', 0,
'Height for the child pileup image. If 0, uses the default height')
'pileup_image_height_child',
0,
'Height for the child pileup image. If 0, uses the default height',
)
PROPOSED_VARIANTS_CHILD_ = flags.DEFINE_string(
'proposed_variants_child', None,
'(Only used when --variant_caller=vcf_candidate_importer.) '
'Tabix-indexed VCF file containing the proposed positions and alts for '
'`vcf_candidate_importer` for the child. The GTs will be ignored.')
'proposed_variants_child',
None,
(
'(Only used when --variant_caller=vcf_candidate_importer.) '
'Tabix-indexed VCF file containing the proposed positions and alts for '
'`vcf_candidate_importer` for the child. The GTs will be ignored.'
),
)
PROPOSED_VARIANTS_PARENT1_ = flags.DEFINE_string(
'proposed_variants_parent1', None,
'(Only used when --variant_caller=vcf_candidate_importer.) '
'Tabix-indexed VCF file containing the proposed positions and alts for '
'`vcf_candidate_importer` for the parent 1. The GTs will be ignored.')
'proposed_variants_parent1',
None,
(
'(Only used when --variant_caller=vcf_candidate_importer.) '
'Tabix-indexed VCF file containing the proposed positions and alts for '
'`vcf_candidate_importer` for the parent 1. The GTs will be ignored.'
),
)
PROPOSED_VARIANTS_PARENT2_ = flags.DEFINE_string(
'proposed_variants_parent2', None,
'(Only used when --variant_caller=vcf_candidate_importer.) '
'Tabix-indexed VCF file containing the proposed positions and alts for '
'`vcf_candidate_importer` for the parent 2. The GTs will be ignored.')
'proposed_variants_parent2',
None,
(
'(Only used when --variant_caller=vcf_candidate_importer.) '
'Tabix-indexed VCF file containing the proposed positions and alts for '
'`vcf_candidate_importer` for the parent 2. The GTs will be ignored.'
),
)
# We are using this flag for determining intervals for both child and parent
# models. In the future, we can consider extending into 3 samples.
CANDIDATE_POSITIONS_ = flags.DEFINE_string(
Expand All @@ -145,38 +197,47 @@ def trio_samples_from_flags(add_flags=True, flags_obj=None):
"""Collects sample-related options into a list of samples."""
# Sample-specific options.
child_sample_name = make_examples_core.assign_sample_name(
sample_name_flag=SAMPLE_NAME_.value, reads_filenames=READS_.value)
sample_name_flag=SAMPLE_NAME_.value, reads_filenames=READS_.value
)

parent1_sample_name = make_examples_core.assign_sample_name(
sample_name_flag=SAMPLE_NAME_PARENT1_.value,
reads_filenames=READS_PARENT1_.value)
reads_filenames=READS_PARENT1_.value,
)

parent2_sample_name = make_examples_core.assign_sample_name(
sample_name_flag=SAMPLE_NAME_PARENT2_.value,
reads_filenames=READS_PARENT2_.value)
reads_filenames=READS_PARENT2_.value,
)

parent1_options = deepvariant_pb2.SampleOptions(
role='parent1',
name=parent1_sample_name,
variant_caller_options=make_examples_core.make_vc_options(
sample_name=parent1_sample_name, flags_obj=flags_obj),
sample_name=parent1_sample_name, flags_obj=flags_obj
),
order=[0, 1, 2],
pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT)
pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT,
)
child_options = deepvariant_pb2.SampleOptions(
role='child',
name=child_sample_name,
variant_caller_options=make_examples_core.make_vc_options(
sample_name=child_sample_name, flags_obj=flags_obj),
sample_name=child_sample_name, flags_obj=flags_obj
),
order=[0, 1, 2],
pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_CHILD)
pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_CHILD,
)
parent2_options = deepvariant_pb2.SampleOptions(
role='parent2',
name=parent2_sample_name,
variant_caller_options=make_examples_core.make_vc_options(
sample_name=parent2_sample_name, flags_obj=flags_obj),
sample_name=parent2_sample_name, flags_obj=flags_obj
),
# Swap the two parents when calling on parent2.
order=[2, 1, 0],
pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT)
pileup_height=dt_constants.PILEUP_DEFAULT_HEIGHT_PARENT,
)

# If --sample_name_to_train is not set, train on the child.
# This is for backward compatibility.
Expand All @@ -196,9 +257,13 @@ def trio_samples_from_flags(add_flags=True, flags_obj=None):
if PROPOSED_VARIANTS_CHILD_.value:
child_options.proposed_variants_filename = PROPOSED_VARIANTS_CHILD_.value
if PROPOSED_VARIANTS_PARENT1_.value:
parent1_options.proposed_variants_filename = PROPOSED_VARIANTS_PARENT1_.value
parent1_options.proposed_variants_filename = (
PROPOSED_VARIANTS_PARENT1_.value
)
if PROPOSED_VARIANTS_PARENT2_.value:
parent2_options.proposed_variants_filename = PROPOSED_VARIANTS_PARENT2_.value
parent2_options.proposed_variants_filename = (
PROPOSED_VARIANTS_PARENT2_.value
)

if DOWNSAMPLE_FRACTION_CHILD_.value != NO_DOWNSAMPLING:
child_options.downsample_fraction = DOWNSAMPLE_FRACTION_CHILD_.value
Expand All @@ -209,7 +274,9 @@ def trio_samples_from_flags(add_flags=True, flags_obj=None):
if PILEUP_IMAGE_HEIGHT_CHILD_.value:
child_options.pileup_height = PILEUP_IMAGE_HEIGHT_CHILD_.value
if PILEUP_IMAGE_HEIGHT_PARENT_.value:
parent1_options.pileup_height = parent2_options.pileup_height = PILEUP_IMAGE_HEIGHT_PARENT_.value
parent1_options.pileup_height = (
parent2_options.pileup_height
) = PILEUP_IMAGE_HEIGHT_PARENT_.value

if SAMPLE_NAME_TO_TRAIN_.value:
if SAMPLE_NAME_TO_TRAIN_.value == SAMPLE_NAME_.value:
Expand All @@ -218,9 +285,13 @@ def trio_samples_from_flags(add_flags=True, flags_obj=None):
sample_role_to_train = parent1_options.role
else:
errors.log_and_raise(
'--sample_name_to_train must match either --sample_name or '
'--sample_name_parent1, or it can be unset to default to '
'--sample_name.', errors.CommandLineError)
(
'--sample_name_to_train must match either --sample_name or '
'--sample_name_parent1, or it can be unset to default to '
'--sample_name.'
),
errors.CommandLineError,
)

# Ordering here determines the default order of samples, and when a sample
# above has a custom .order, then this is the list those indices refer to.
Expand Down Expand Up @@ -248,22 +319,36 @@ def default_options(add_flags=True, flags_obj=None):
flags_obj = FLAGS

samples_in_order, sample_role_to_train = trio_samples_from_flags(
add_flags=add_flags, flags_obj=flags_obj)
add_flags=add_flags, flags_obj=flags_obj
)

options = make_examples_options.shared_flags_to_options(
add_flags=add_flags,
flags_obj=flags_obj,
samples_in_order=samples_in_order,
sample_role_to_train=sample_role_to_train,
main_sample_index=MAIN_SAMPLE_INDEX)
main_sample_index=MAIN_SAMPLE_INDEX,
)

if add_flags:
options.bam_fname = os.path.basename(
READS_.value) + '|' + (os.path.basename(READS_PARENT1_.value)
if READS_PARENT1_.value else 'None') + '|' + (
os.path.basename(READS_PARENT2_.value)
if READS_PARENT2_.value else 'None')
options.pic_options.sequencing_type = deepvariant_pb2.PileupImageOptions.TRIO
options.bam_fname = (
os.path.basename(READS_.value)
+ '|'
+ (
os.path.basename(READS_PARENT1_.value)
if READS_PARENT1_.value
else 'None'
)
+ '|'
+ (
os.path.basename(READS_PARENT2_.value)
if READS_PARENT2_.value
else 'None'
)
)
options.pic_options.sequencing_type = (
deepvariant_pb2.PileupImageOptions.TRIO
)
if not options.pic_options.height:
options.pic_options.height = dt_constants.PILEUP_DEFAULT_HEIGHT
if not options.pic_options.width:
Expand All @@ -277,24 +362,32 @@ def check_options_are_valid(options):

# Check for general flags (shared for DeepVariant and DeepTrio).
make_examples_options.check_options_are_valid(
options, main_sample_index=MAIN_SAMPLE_INDEX)
options, main_sample_index=MAIN_SAMPLE_INDEX
)

child = options.sample_options[MAIN_SAMPLE_INDEX]

# Sanity check the sample_names (specific to trio).
if (child.variant_caller_options.sample_name == FLAGS.sample_name_parent1 or
child.variant_caller_options.sample_name == FLAGS.sample_name_parent2):
if (
child.variant_caller_options.sample_name == FLAGS.sample_name_parent1
or child.variant_caller_options.sample_name == FLAGS.sample_name_parent2
):
errors.log_and_raise(
'The sample_name of the child is the same as one of '
'the parents.', errors.CommandLineError)
'The sample_name of the child is the same as one of the parents.',
errors.CommandLineError,
)

if options.pic_options.alt_aligned_pileup == 'rows':
errors.log_and_raise('--alt_aligned_pileup="rows" cannot be used with '
'DeepTrio because the pileup images would become '
'too tall for InceptionV3.')
errors.log_and_raise(
'--alt_aligned_pileup="rows" cannot be used with '
'DeepTrio because the pileup images would become '
'too tall for InceptionV3.'
)

if (options.mode == deepvariant_pb2.MakeExamplesOptions.CANDIDATE_SWEEP and
child.candidate_positions is None):
if (
options.mode == deepvariant_pb2.MakeExamplesOptions.CANDIDATE_SWEEP
and child.candidate_positions is None
):
errors.log_and_raise(
'--candidate_positions is required when --positions_sweep_mode is set.'
)
Expand All @@ -306,7 +399,9 @@ def main(argv=()):
errors.log_and_raise(
'Command line parsing failure: make_examples does not accept '
'positional arguments but some are present on the command line: '
'"{}".'.format(str(argv)), errors.CommandLineError)
'"{}".'.format(str(argv)),
errors.CommandLineError,
)
del argv # Unused.

proto_utils.uses_fast_cpp_protos_or_die()
Expand Down
Loading

0 comments on commit f068215

Please sign in to comment.