diff --git a/genes/hgvs/biocommons_hgvs/hgvs_converter_biocommons.py b/genes/hgvs/biocommons_hgvs/hgvs_converter_biocommons.py index cc4e97295..fe8c35b9b 100644 --- a/genes/hgvs/biocommons_hgvs/hgvs_converter_biocommons.py +++ b/genes/hgvs/biocommons_hgvs/hgvs_converter_biocommons.py @@ -135,11 +135,7 @@ def __init__(self, genome_build: GenomeBuild, local_resolution=True, clingen_res def _parser_hgvs(hgvs_string: str) -> SequenceVariant: """ All calls to parsing go through here """ - if "ins" in hgvs_string: - if re.match(".*ins\d+$", hgvs_string): - raise HGVSException("Insertions require inserted sequence, not an integer length") - if re.match(".*ins$", hgvs_string): - raise HGVSException("Insertions require inserted sequence") + HGVSConverter._hgvs_string_validation(hgvs_string) # Biocommons HGVS doesn't accept integers on the end of indels - ie NM_001354689.1(RAF1):c.1_2dup3 # We want to strip these and raise an error if the span is wrong diff --git a/genes/hgvs/hgvs_converter.py b/genes/hgvs/hgvs_converter.py index 0df017bbd..7ced4ee01 100644 --- a/genes/hgvs/hgvs_converter.py +++ b/genes/hgvs/hgvs_converter.py @@ -1,7 +1,8 @@ import abc +import re from enum import Enum -from genes.hgvs import HGVSVariant +from genes.hgvs import HGVSVariant, HGVSException from snpdb.models import GenomeBuild, VariantCoordinate @@ -53,6 +54,18 @@ def __init__(self, genome_build: GenomeBuild, local_resolution=True, clingen_res self.local_resolution = local_resolution self.clingen_resolution = clingen_resolution + @staticmethod + def _hgvs_string_validation(hgvs_string: str): + """ raise exceptions on any errors """ + + if "ins" in hgvs_string: + if re.match(".*ins\d+$", hgvs_string): + raise HGVSException("Insertions require inserted sequence, not an integer length") + if re.match(".*ins$", hgvs_string): + raise HGVSException("Insertions require inserted sequence") + + + @abc.abstractmethod def create_hgvs_variant(self, hgvs_string: str) -> HGVSVariant: pass diff --git a/genes/hgvs/pyhgvs/hgvs_converter_pyhgvs.py b/genes/hgvs/pyhgvs/hgvs_converter_pyhgvs.py index de638ffbb..8d845f26d 100644 --- a/genes/hgvs/pyhgvs/hgvs_converter_pyhgvs.py +++ b/genes/hgvs/pyhgvs/hgvs_converter_pyhgvs.py @@ -126,6 +126,8 @@ class PyHGVSConverter(HGVSConverter): @staticmethod def _hgvs_name(hgvs_string): """ Catches PyHGVS specific exceptions and converts to HGVSException """ + + HGVSConverter._hgvs_string_validation(hgvs_string) try: return HGVSName(hgvs_string) except pyhgvs.InvalidHGVSName as e: diff --git a/genes/tests/test_hgvs.py b/genes/tests/test_hgvs.py index e247fe2b9..3ba661a6d 100644 --- a/genes/tests/test_hgvs.py +++ b/genes/tests/test_hgvs.py @@ -282,3 +282,21 @@ def _test_mitochondria_hgvs(self, hgvs_converter_type: HGVSConverterType): lib_hgvs_string = matcher.hgvs_converter.variant_coordinate_to_g_hgvs(vc) self.assertTrue("m." in str(lib_hgvs_string), "HGVS library conversion") + + def test_biocommons_invalid_trailing_int(self): + return self._test_invalid_trailing_int(HGVSConverterType.BIOCOMMONS_HGVS) + + def test_pyhgvs_invalid_trailing_int(self): + return self._test_invalid_trailing_int(HGVSConverterType.PYHGVS) + + def _test_invalid_trailing_int(self, hgvs_converter_type: HGVSConverterType): + _bad_examples = [ + "NM_000441.2(SLC26A4):c.1246_2341ins23", + "NM_003194.4(TBP):c.223_281delins50", + ] + + matcher = HGVSMatcher(GenomeBuild.grch37(), hgvs_converter_type=hgvs_converter_type) + for hgvs_string in _bad_examples: + def get_vc(): + return matcher.get_variant_coordinate(hgvs_string) + self.assertRaises(HGVSException, get_vc)