Skip to content

Commit

Permalink
issue SACGF/variantgrid_private#2027 - do HGVS trailing integer valid…
Browse files Browse the repository at this point in the history
…ation (del50) for pyhgvs as well as biocommons
  • Loading branch information
davmlaw committed Jul 11, 2024
1 parent d9cb0bb commit c713b10
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 6 deletions.
6 changes: 1 addition & 5 deletions genes/hgvs/biocommons_hgvs/hgvs_converter_biocommons.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,7 @@ def __init__(self, genome_build: GenomeBuild, local_resolution=True, clingen_res
def _parser_hgvs(hgvs_string: str) -> SequenceVariant:
""" All calls to parsing go through here """

if "ins" in hgvs_string:
if re.match(".*ins\d+$", hgvs_string):
raise HGVSException("Insertions require inserted sequence, not an integer length")
if re.match(".*ins$", hgvs_string):
raise HGVSException("Insertions require inserted sequence")
HGVSConverter._hgvs_string_validation(hgvs_string)

# Biocommons HGVS doesn't accept integers on the end of indels - ie NM_001354689.1(RAF1):c.1_2dup3
# We want to strip these and raise an error if the span is wrong
Expand Down
15 changes: 14 additions & 1 deletion genes/hgvs/hgvs_converter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import abc
import re
from enum import Enum

from genes.hgvs import HGVSVariant
from genes.hgvs import HGVSVariant, HGVSException
from snpdb.models import GenomeBuild, VariantCoordinate


Expand Down Expand Up @@ -53,6 +54,18 @@ def __init__(self, genome_build: GenomeBuild, local_resolution=True, clingen_res
self.local_resolution = local_resolution
self.clingen_resolution = clingen_resolution

@staticmethod
def _hgvs_string_validation(hgvs_string: str):
""" raise exceptions on any errors """

if "ins" in hgvs_string:
if re.match(".*ins\d+$", hgvs_string):
raise HGVSException("Insertions require inserted sequence, not an integer length")
if re.match(".*ins$", hgvs_string):
raise HGVSException("Insertions require inserted sequence")



@abc.abstractmethod
def create_hgvs_variant(self, hgvs_string: str) -> HGVSVariant:
pass
Expand Down
2 changes: 2 additions & 0 deletions genes/hgvs/pyhgvs/hgvs_converter_pyhgvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class PyHGVSConverter(HGVSConverter):
@staticmethod
def _hgvs_name(hgvs_string):
""" Catches PyHGVS specific exceptions and converts to HGVSException """

HGVSConverter._hgvs_string_validation(hgvs_string)
try:
return HGVSName(hgvs_string)
except pyhgvs.InvalidHGVSName as e:
Expand Down
18 changes: 18 additions & 0 deletions genes/tests/test_hgvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,21 @@ def _test_mitochondria_hgvs(self, hgvs_converter_type: HGVSConverterType):

lib_hgvs_string = matcher.hgvs_converter.variant_coordinate_to_g_hgvs(vc)
self.assertTrue("m." in str(lib_hgvs_string), "HGVS library conversion")

def test_biocommons_invalid_trailing_int(self):
return self._test_invalid_trailing_int(HGVSConverterType.BIOCOMMONS_HGVS)

def test_pyhgvs_invalid_trailing_int(self):
return self._test_invalid_trailing_int(HGVSConverterType.PYHGVS)

def _test_invalid_trailing_int(self, hgvs_converter_type: HGVSConverterType):
_bad_examples = [
"NM_000441.2(SLC26A4):c.1246_2341ins23",
"NM_003194.4(TBP):c.223_281delins50",
]

matcher = HGVSMatcher(GenomeBuild.grch37(), hgvs_converter_type=hgvs_converter_type)
for hgvs_string in _bad_examples:
def get_vc():
return matcher.get_variant_coordinate(hgvs_string)
self.assertRaises(HGVSException, get_vc)

0 comments on commit c713b10

Please sign in to comment.