Skip to content

Commit

Permalink
updates that start to address #518 and #503
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter-J-Freeman committed Aug 10, 2023
1 parent 695bab9 commit c8b472b
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 8 deletions.
54 changes: 48 additions & 6 deletions VariantValidator/modules/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,31 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
test = validator.hp.parse_hgvs_variant(quibble_input)
if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
post_var.posedit.pos.end.base != test.posedit.pos.end.base:
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for transcript " \
"%s" % (str(test.posedit.pos).split("_")[-1], test.ac)

# If this is a boundary issue with a valid boundary stated, but incorrect intronic numbering we can
# Refer to https://github.com/openvar/variantValidator/issues/518
can_we_autocorrect = False
if post_var.posedit.pos.start.base != test.posedit.pos.start.base:
if "-" in str(test.posedit.pos.start) and "+" in str(post_var.posedit.pos.start) and \
post_var.posedit.pos.start.base == test.posedit.pos.start.base - 1:
can_we_autocorrect = True
elif "+" in str(test.posedit.pos.start) and "-" in str(post_var.posedit.pos.start) and \
post_var.posedit.pos.start.base == test.posedit.pos.start.base + 1:
can_we_autocorrect = True
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
"transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
elif post_var.posedit.pos.end.base != test.posedit.pos.end.base:
if "-" in str(test.posedit.pos.end) and "+" in str(post_var.posedit.pos.end) and \
post_var.posedit.pos.end.base == test.posedit.pos.end.base - 1:
can_we_autocorrect = True
elif "+" in str(test.posedit.pos.end) and "-" in str(post_var.posedit.pos.end) and \
post_var.posedit.pos.end.base == test.posedit.pos.end.base + 1:
can_we_autocorrect = True
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
"transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
variant.warnings.extend([caution])
raise MappersError(caution)
if can_we_autocorrect is False:
raise MappersError(caution)

else: # del not in formatted_variant

Expand All @@ -431,10 +452,31 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
test = validator.hp.parse_hgvs_variant(quibble_input)
if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
post_var.posedit.pos.end.base != test.posedit.pos.end.base:
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for transcript " \
"%s" % (str(test.posedit.pos).split("_")[-1], test.ac)

# If this is a boundary issue with a valid boundary stated, but incorrect intronic numbering we can
# Refer to https://github.com/openvar/variantValidator/issues/518
can_we_autocorrect = False
if post_var.posedit.pos.start.base != test.posedit.pos.start.base:
if "-" in str(test.posedit.pos.start) and "+" in str(post_var.posedit.pos.start) and \
post_var.posedit.pos.start.base == test.posedit.pos.start.base - 1:
can_we_autocorrect = True
elif "+" in str(test.posedit.pos.start) and "-" in str(post_var.posedit.pos.start) and \
post_var.posedit.pos.start.base == test.posedit.pos.start.base + 1:
can_we_autocorrect = True
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
"transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
elif post_var.posedit.pos.end.base != test.posedit.pos.end.base:
if "-" in str(test.posedit.pos.end) and "+" in str(post_var.posedit.pos.end) and \
post_var.posedit.pos.end.base == test.posedit.pos.end.base - 1:
can_we_autocorrect = True
elif "+" in str(test.posedit.pos.end) and "-" in str(post_var.posedit.pos.end) and \
post_var.posedit.pos.end.base == test.posedit.pos.end.base + 1:
can_we_autocorrect = True
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
"transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
variant.warnings.extend([caution])
raise MappersError(caution)
if can_we_autocorrect is False:
raise MappersError(caution)

elif ':g.' not in quibble_input:
query = validator.hp.parse_hgvs_variant(formatted_variant)
Expand Down
4 changes: 4 additions & 0 deletions VariantValidator/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,10 @@ def translate(ed_seq, cds_start, modified_aa=None):
continue

trans = "".join(translation)

print("It translates to")
print(trans)

aain = list(trans)
aaout = []
count = 0
Expand Down
8 changes: 8 additions & 0 deletions VariantValidator/modules/vvMixinCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -1159,16 +1159,24 @@ def validate(self,

# Remove LRG
format_p = predicted_protein_variant_dict['tlr']

print("SEE LRG")
print(format_p)

if 'LRG' in format_p:
print(format_p)
format_lrg = copy.copy(format_p)
format_p = re.sub(r'\(LRG_.+?\)', '', format_p)
print(format_p)
format_lrg = format_lrg.split('(')[1]
format_lrg = format_lrg.replace(')', '')
print(format_lrg)
else:
format_lrg = None
pass

re_parse_protein = self.hp.parse_hgvs_variant(format_p)
print(re_parse_protein)

# Set formatted tlr
predicted_protein_variant_dict['tlr'] = str(copy.copy(re_parse_protein))
Expand Down
7 changes: 6 additions & 1 deletion VariantValidator/modules/vvMixinInit.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,12 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn):

if hgvs_transcript.type == 'c':
# Handle non inversions with simple c_to_p mapping
print("TYPE")
print(hgvs_transcript.posedit.edit.type)

if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'dup') and \
(hgvs_transcript.posedit.edit.type != 'delins')and (re_to_p is False):
(hgvs_transcript.posedit.edit.type != 'delins') and (hgvs_transcript.posedit.edit.type != 'sub') \
and (re_to_p is False):
hgvs_protein = None
# Does the edit affect the start codon?
if ((1 <= hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0)
Expand Down Expand Up @@ -318,6 +321,8 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn):
inv_seq = ''
elif 'dup' in hgvs_transcript.posedit.edit.type:
inv_seq = del_seq + del_seq
elif 'sub' in hgvs_transcript.posedit.edit.type:
inv_seq = hgvs_transcript.posedit.edit.alt

shifts = ''
# Look for p. delins or del
Expand Down
12 changes: 11 additions & 1 deletion tests/test_warnings.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,16 @@ def test_issue_455(self):
"NP_000483.3:p.? is HGVS compliant and contains a valid reference amino acid description"
]

def test_issue_518a(self):
variant = 'NM_000086.2(CLN3):c.791-802_1056+1445del'
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)

print(results)
assert results['NM_000086.2:c.790+532_1056+1445del']['validation_warnings'] == [
"Removing redundant gene symbol CLN3 from variant description",
"ExonBoundaryError: Position c.791-802 does not correspond with an exon boundary for transcript NM_000086.2"
]


class TestVFGapWarnings(TestCase):

Expand Down Expand Up @@ -772,7 +782,7 @@ def test_vv_series_16(self):
variant = 'NM_207122.2:c.1174_1174+1insAT'
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
print(results)
assert "ExonBoundaryError: Position c.1174+1 does not correspond with an exon boundary for transcript NM_207122.2" in \
assert "ExonBoundaryError: Position c.1174 does not correspond with an exon boundary for transcript NM_207122.2" in \
results['validation_warning_1']['validation_warnings']

def test_vv_series_17(self):
Expand Down

0 comments on commit c8b472b

Please sign in to comment.