updates that start to address #518 and #503

openvar · Aug 10, 2023 · c8b472b · c8b472b
1 parent 695bab9
commit c8b472b
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 8 deletions.
diff --git a/VariantValidator/modules/mappers.py b/VariantValidator/modules/mappers.py
@@ -413,10 +413,31 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
             test = validator.hp.parse_hgvs_variant(quibble_input)
             if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
                     post_var.posedit.pos.end.base != test.posedit.pos.end.base:
-                caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for transcript " \
-                          "%s" % (str(test.posedit.pos).split("_")[-1], test.ac)
+
+                # If this is a boundary issue with a valid boundary stated, but incorrect intronic numbering we can
+                # Refer to https://github.com/openvar/variantValidator/issues/518
+                can_we_autocorrect = False
+                if post_var.posedit.pos.start.base != test.posedit.pos.start.base:
+                    if "-" in str(test.posedit.pos.start) and "+" in str(post_var.posedit.pos.start) and \
+                            post_var.posedit.pos.start.base == test.posedit.pos.start.base - 1:
+                        can_we_autocorrect = True
+                    elif "+" in str(test.posedit.pos.start) and "-" in str(post_var.posedit.pos.start) and \
+                            post_var.posedit.pos.start.base == test.posedit.pos.start.base + 1:
+                        can_we_autocorrect = True
+                    caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
+                              "transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
+                elif post_var.posedit.pos.end.base != test.posedit.pos.end.base:
+                    if "-" in str(test.posedit.pos.end) and "+" in str(post_var.posedit.pos.end) and \
+                            post_var.posedit.pos.end.base == test.posedit.pos.end.base - 1:
+                        can_we_autocorrect = True
+                    elif "+" in str(test.posedit.pos.end) and "-" in str(post_var.posedit.pos.end) and \
+                            post_var.posedit.pos.end.base == test.posedit.pos.end.base + 1:
+                        can_we_autocorrect = True
+                    caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
+                              "transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
                 variant.warnings.extend([caution])
-                raise MappersError(caution)
+                if can_we_autocorrect is False:
+                    raise MappersError(caution)
 
         else:  # del not in formatted_variant
 
@@ -431,10 +452,31 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
             test = validator.hp.parse_hgvs_variant(quibble_input)
             if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
                     post_var.posedit.pos.end.base != test.posedit.pos.end.base:
-                caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for transcript " \
-                          "%s" % (str(test.posedit.pos).split("_")[-1], test.ac)
+
+                # If this is a boundary issue with a valid boundary stated, but incorrect intronic numbering we can
+                # Refer to https://github.com/openvar/variantValidator/issues/518
+                can_we_autocorrect = False
+                if post_var.posedit.pos.start.base != test.posedit.pos.start.base:
+                    if "-" in str(test.posedit.pos.start) and "+" in str(post_var.posedit.pos.start) and \
+                            post_var.posedit.pos.start.base == test.posedit.pos.start.base - 1:
+                        can_we_autocorrect = True
+                    elif "+" in str(test.posedit.pos.start) and "-" in str(post_var.posedit.pos.start) and \
+                            post_var.posedit.pos.start.base == test.posedit.pos.start.base + 1:
+                        can_we_autocorrect = True
+                    caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
+                              "transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
+                elif post_var.posedit.pos.end.base != test.posedit.pos.end.base:
+                    if "-" in str(test.posedit.pos.end) and "+" in str(post_var.posedit.pos.end) and \
+                            post_var.posedit.pos.end.base == test.posedit.pos.end.base - 1:
+                        can_we_autocorrect = True
+                    elif "+" in str(test.posedit.pos.end) and "-" in str(post_var.posedit.pos.end) and \
+                            post_var.posedit.pos.end.base == test.posedit.pos.end.base + 1:
+                        can_we_autocorrect = True
+                    caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for " \
+                              "transcript %s" % (str(test.posedit.pos).split("_")[0], test.ac)
                 variant.warnings.extend([caution])
-                raise MappersError(caution)
+                if can_we_autocorrect is False:
+                    raise MappersError(caution)
 
     elif ':g.' not in quibble_input:
         query = validator.hp.parse_hgvs_variant(formatted_variant)

diff --git a/VariantValidator/modules/utils.py b/VariantValidator/modules/utils.py
@@ -520,6 +520,10 @@ def translate(ed_seq, cds_start, modified_aa=None):
                     continue
 
         trans = "".join(translation)
+
+        print("It translates to")
+        print(trans)
+
         aain = list(trans)
         aaout = []
         count = 0

diff --git a/VariantValidator/modules/vvMixinCore.py b/VariantValidator/modules/vvMixinCore.py
@@ -1159,16 +1159,24 @@ def validate(self,
 
                             # Remove LRG
                             format_p = predicted_protein_variant_dict['tlr']
+
+                            print("SEE LRG")
+                            print(format_p)
+
                             if 'LRG' in format_p:
+                                print(format_p)
                                 format_lrg = copy.copy(format_p)
                                 format_p = re.sub(r'\(LRG_.+?\)', '', format_p)
+                                print(format_p)
                                 format_lrg = format_lrg.split('(')[1]
                                 format_lrg = format_lrg.replace(')', '')
+                                print(format_lrg)
                             else:
                                 format_lrg = None
                                 pass
 
                             re_parse_protein = self.hp.parse_hgvs_variant(format_p)
+                            print(re_parse_protein)
 
                             # Set formatted tlr
                             predicted_protein_variant_dict['tlr'] = str(copy.copy(re_parse_protein))

diff --git a/VariantValidator/modules/vvMixinInit.py b/VariantValidator/modules/vvMixinInit.py
@@ -247,9 +247,12 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn):
 
         if hgvs_transcript.type == 'c':
             # Handle non inversions with simple c_to_p mapping
+            print("TYPE")
+            print(hgvs_transcript.posedit.edit.type)
 
             if (hgvs_transcript.posedit.edit.type != 'inv') and (hgvs_transcript.posedit.edit.type != 'dup') and \
-                    (hgvs_transcript.posedit.edit.type != 'delins')and (re_to_p is False):
+                    (hgvs_transcript.posedit.edit.type != 'delins') and (hgvs_transcript.posedit.edit.type != 'sub') \
+                        and (re_to_p is False):
                 hgvs_protein = None
                 # Does the edit affect the start codon?
                 if ((1 <= hgvs_transcript.posedit.pos.start.base <= 3 and hgvs_transcript.posedit.pos.start.offset == 0)
@@ -318,6 +321,8 @@ def myc_to_p(self, hgvs_transcript, evm, re_to_p, hn):
                         inv_seq = ''
                 elif 'dup' in hgvs_transcript.posedit.edit.type:
                     inv_seq = del_seq + del_seq
+                elif 'sub' in hgvs_transcript.posedit.edit.type:
+                    inv_seq = hgvs_transcript.posedit.edit.alt
 
                 shifts = ''
                 # Look for p. delins or del

diff --git a/tests/test_warnings.py b/tests/test_warnings.py
@@ -396,6 +396,16 @@ def test_issue_455(self):
             "NP_000483.3:p.? is HGVS compliant and contains a valid reference amino acid description"
         ]
 
+    def test_issue_518a(self):
+        variant = 'NM_000086.2(CLN3):c.791-802_1056+1445del'
+        results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
+
+        print(results)
+        assert results['NM_000086.2:c.790+532_1056+1445del']['validation_warnings'] == [
+            "Removing redundant gene symbol CLN3 from variant description",
+            "ExonBoundaryError: Position c.791-802 does not correspond with an exon boundary for transcript NM_000086.2"
+        ]
+
 
 class TestVFGapWarnings(TestCase):
 
@@ -772,7 +782,7 @@ def test_vv_series_16(self):
         variant = 'NM_207122.2:c.1174_1174+1insAT'
         results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
         print(results)
-        assert "ExonBoundaryError: Position c.1174+1 does not correspond with an exon boundary for transcript NM_207122.2" in \
+        assert "ExonBoundaryError: Position c.1174 does not correspond with an exon boundary for transcript NM_207122.2" in \
                results['validation_warning_1']['validation_warnings']
 
     def test_vv_series_17(self):