From dcb8c4701e826a9f64b258e59c01cac706e7675e Mon Sep 17 00:00:00 2001 From: Mihai Lefter Date: Wed, 2 Aug 2023 15:11:49 +0200 Subject: [PATCH] Other ambiguities --- mutalyzer_hgvs_parser/hgvs_parser.py | 164 ++++++++++++++++++++++++++- tests/test_convert.py | 56 ++++++++- 2 files changed, 216 insertions(+), 4 deletions(-) diff --git a/mutalyzer_hgvs_parser/hgvs_parser.py b/mutalyzer_hgvs_parser/hgvs_parser.py index 973c574..08a8f5c 100644 --- a/mutalyzer_hgvs_parser/hgvs_parser.py +++ b/mutalyzer_hgvs_parser/hgvs_parser.py @@ -257,8 +257,8 @@ and data_equals(children, [0, 1], "repeat") and data_equals(children, [1, 0], "location") and data_equals(children, [1, 1], "repeat") + and len(get_child(children, [2]).children) == 1 and data_equals(children, [2, 0], "location") - and len(get_child(children, [2, 0]).children) == 1 ), "selected": 2, }, @@ -295,6 +295,164 @@ ), "selected": 0, }, + { + "type": "variant_certain_deletion | variant_certain_repeat - deletion", + # R1:c.10-5_10-2delR2:10del + "conditions": lambda children: ( + len(children) == 2 + and children[0].data == children[1].data == "variant_certain" + and len(get_child(children, [0]).children) == 2 + and data_equals(children, [0, 0], "location") + and data_equals(children, [0, 1], "deletion") + and len(get_child(children, [1]).children) == 2 + and data_equals(children, [1, 0], "location") + and data_equals(children, [1, 1], "repeat") + + and len(get_child(children, [0, 1]).children) == 1 + and data_equals(children, [0, 1, 0], "inserted") + + ), + "selected": 0, + }, + { + "type": "variant_certain_delins | variant_certain_delins - one insert", + # R1:c.10-5_10-2delinsTCTR2.2:c.10insT + "conditions": lambda children: ( + len(children) == 2 + and children[0].data == children[1].data == "deletion_insertion" + and len(get_child(children, [1]).children) == 1 + and data_equals(children, [0, 0], "inserted") + ), + "selected": 1 + }, + # TODO: revisit the next ones in the repeats context. + { + "type": "variant_certain_repeat | variant_certain_repeat_length - length 0", + # R1:c.10-2[5] + "conditions": lambda children: ( + len(children) == 2 + and children[0].data == children[1].data == "variant_certain" + and len(get_child(children, [0]).children) == 2 + and data_equals(children, [0, 0], "location") + and data_equals(children, [0, 1], "repeat") + + and len(get_child(children, [1]).children) == 2 + and data_equals(children, [1, 0], "location") + and data_equals(children, [1, 1], "repeat") + + and len(get_child(children, [0, 1]).children) == 1 + and data_equals(children, [0, 1, 0], "inserted") + and len(get_child(children, [0, 1, 0]).children) == 1 + and data_equals(children, [0, 1, 0, 0], "insert") + and len(get_child(children, [0, 1, 0, 0]).children) == 1 + and data_equals(children, [0, 1, 0, 0, 0], "length") + ), + "selected": 0 + }, + { + "type": "variant_certain_repeat | variant_certain_repeat_length - length 1", + # R1:c.10-2[5] + "conditions": lambda children: ( + len(children) == 2 + and children[0].data == children[1].data == "variant_certain" + and len(get_child(children, [0]).children) == 2 + and data_equals(children, [0, 0], "location") + and data_equals(children, [0, 1], "repeat") + + and len(get_child(children, [1]).children) == 2 + and data_equals(children, [1, 0], "location") + and data_equals(children, [1, 1], "repeat") + + and len(get_child(children, [1, 1]).children) == 1 + and data_equals(children, [1, 1, 0], "inserted") + and len(get_child(children, [1, 1, 0]).children) == 1 + and data_equals(children, [1, 1, 0, 0], "insert") + and len(get_child(children, [1, 1, 0, 0]).children) == 1 + and data_equals(children, [1, 1, 0, 0, 0], "length") + ), + "selected": 1 + }, + { + "type": "variant_certain_repeat | variant_certain_repeat_range_length - length 0", + # R1:c.10-2_10-4[5] + "conditions": lambda children: ( + len(children) == 3 + and children[0].data == children[1].data == "variant_certain" + and len(get_child(children, [0]).children) == 2 + and data_equals(children, [0, 0], "location") + and data_equals(children, [0, 1], "repeat") + + and len(get_child(children, [1]).children) == 2 + and data_equals(children, [1, 0], "location") + and data_equals(children, [1, 1], "repeat") + + and len(get_child(children, [2]).children) == 2 + and data_equals(children, [2, 0], "location") + and data_equals(children, [2, 1], "repeat") + + and len(get_child(children, [0, 1]).children) == 1 + and data_equals(children, [0, 1, 0], "inserted") + and len(get_child(children, [0, 1, 0]).children) == 1 + and data_equals(children, [0, 1, 0, 0], "insert") + and len(get_child(children, [0, 1, 0, 0]).children) == 1 + and data_equals(children, [0, 1, 0, 0, 0], "length") + ), + "selected": 0 + }, + { + "type": "variant_certain_repeat | variant_certain_repeat_range_length - length 1", + # R1:c.10-2_10-4[5] + "conditions": lambda children: ( + len(children) == 3 + and children[0].data == children[1].data == "variant_certain" + and len(get_child(children, [0]).children) == 2 + and data_equals(children, [0, 0], "location") + and data_equals(children, [0, 1], "repeat") + + and len(get_child(children, [1]).children) == 2 + and data_equals(children, [1, 0], "location") + and data_equals(children, [1, 1], "repeat") + + and len(get_child(children, [2]).children) == 2 + and data_equals(children, [2, 0], "location") + and data_equals(children, [2, 1], "repeat") + + and len(get_child(children, [1, 1]).children) == 1 + and data_equals(children, [1, 1, 0], "inserted") + and len(get_child(children, [1, 1, 0]).children) == 1 + and data_equals(children, [1, 1, 0, 0], "insert") + and len(get_child(children, [1, 1, 0, 0]).children) == 1 + and data_equals(children, [1, 1, 0, 0, 0], "length") + ), + "selected": 1 + }, + { + "type": "variant_certain_repeat | variant_certain_repeat_range_length - length 2", + # R1:c.10-2_10-4[5] + "conditions": lambda children: ( + len(children) == 3 + and children[0].data == children[1].data == "variant_certain" + and len(get_child(children, [0]).children) == 2 + and data_equals(children, [0, 0], "location") + and data_equals(children, [0, 1], "repeat") + + and len(get_child(children, [1]).children) == 2 + and data_equals(children, [1, 0], "location") + and data_equals(children, [1, 1], "repeat") + + and len(get_child(children, [2]).children) == 2 + and data_equals(children, [2, 0], "location") + and data_equals(children, [2, 1], "repeat") + + and len(get_child(children, [2, 1]).children) == 1 + and data_equals(children, [2, 1, 0], "inserted") + and len(get_child(children, [2, 1, 0]).children) == 1 + and data_equals(children, [2, 1, 0, 0], "insert") + and len(get_child(children, [2, 1, 0, 0]).children) == 1 + and data_equals(children, [2, 1, 0, 0, 0], "length") + ), + "selected": 2 + }, ] @@ -303,8 +461,8 @@ def _ambig(self, children): for ambig in AMBIGUITIES: if ambig["conditions"](children): return children[ambig["selected"]] - # from lark.tree import pydot__tree_to_png - # pydot__tree_to_png(Tree("ambig", children), "ambig.png") + from lark.tree import pydot__tree_to_png + pydot__tree_to_png(Tree("ambig", children), "ambig.png") raise Exception("Ambiguity not solved.") diff --git a/tests/test_convert.py b/tests/test_convert.py index c50001e..24d4f5e 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -959,6 +959,53 @@ def test_variants_to_model(variants, model): ] }], }, + "R1:c.10-5_10-2delinsTCTR2.2:c.10": { + "reference": REFERENCES["R1"], + "type": "description_dna", + "coordinate_system": "c", + "variants": [{ + "location": LOCATIONS["10-5_10-2"], + "type": "deletion_insertion", + "source": "reference", + "inserted": [ + { + "type": "description_dna", + "source": {"id": "TCTR2.2"}, + "coordinate_system": "c", + "location": { + "type": "point", + "position": 10 + } + } + ] + }], + }, + # TODO: revisit this in the repeats context. + "R1:c.10-20[5]": { + "reference": REFERENCES["R1"], + "type": "description_dna", + "coordinate_system": "c", + "variants": [{ + "location": LOCATIONS["10-20"], + "type": "repeat", + "source": "reference", + "inserted": [{"length": LENGTHS["5"]} + ] + }], + }, + "R1:c.10-5_10-2[5]": { + "reference": REFERENCES["R1"], + "type": "description_dna", + "coordinate_system": "c", + "variants": [{ + "location": LOCATIONS["10-5_10-2"], + "type": "repeat", + "source": "reference", + "inserted": [{"length": LENGTHS["5"]} + ] + }], + }, + } @@ -1012,7 +1059,14 @@ def test_mix(description, model): @pytest.mark.parametrize( "description", - ["R1:1delinsR2:2del", "R1:1del[R2:2del]", "R1:[1del;10_11insR2:2del]"], + [ + "R1:1delinsR2:2del", + "R1:1del[R2:2del]", + "R1:[1del;10_11insR2:2del]", + "R1:c.10-5_10-2delR2:10del", + "R1:c.10-5_10-2dupR2:10del", + "R1:c.10-5_10-2delinsTCTR2.2:c.10insT", + ], ) def test_nested_descriptions(description): with pytest.raises(NestedDescriptions):