Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: don't redefine Strand model #168

Merged
merged 2 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ dependencies = [
"ga4gh.vrs ~=0.8.1",
"biocommons.seqrepo",
"gene-normalizer ~=0.1.40-dev1",
"cool-seq-tool ~=0.5.0",
"cool-seq-tool ~=0.5.1",
]
dynamic=["version"]

Expand Down
3 changes: 1 addition & 2 deletions src/fusor/fusor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from bioutils.accessions import coerce_namespace
from cool_seq_tool.app import CoolSeqTool
from cool_seq_tool.schemas import ResidueMode
from cool_seq_tool.schemas import ResidueMode, Strand
from ga4gh.core import ga4gh_identify
from ga4gh.vrs import models
from ga4gh.vrsatile.pydantic.vrs_models import (
Expand Down Expand Up @@ -42,7 +42,6 @@
MultiplePossibleGenesElement,
RegulatoryClass,
RegulatoryElement,
Strand,
StructuralElementType,
TemplatedSequenceElement,
TranscriptSegmentElement,
Expand Down
10 changes: 2 additions & 8 deletions src/fusor/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from enum import Enum
from typing import Any, Literal

from cool_seq_tool.schemas import Strand
from ga4gh.vrsatile.pydantic import return_value
from ga4gh.vrsatile.pydantic.vrsatile_models import (
CURIE,
Expand Down Expand Up @@ -248,13 +249,6 @@ def validate_sequence(cls, v):
)


class Strand(str, Enum):
"""Define possible values for strand"""

POSITIVE = "+"
NEGATIVE = "-"


class TemplatedSequenceElement(BaseStructuralElement):
"""Define Templated Sequence Element class.
A templated sequence is a contiguous genomic sequence found in the gene
Expand Down Expand Up @@ -286,7 +280,7 @@ class TemplatedSequenceElement(BaseStructuralElement):
},
"label": "chr12:44908821-44908822(+)",
},
"strand": "+",
"strand": 1,
}
},
)
Expand Down
4 changes: 3 additions & 1 deletion src/fusor/nomenclature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Provide helper methods for fusion nomenclature generation."""

from biocommons.seqrepo.seqrepo import SeqRepo
from cool_seq_tool.schemas import Strand
from ga4gh.vrsatile.pydantic.vrs_models import SequenceLocation

from fusor.exceptions import IDTranslationException
Expand Down Expand Up @@ -98,6 +99,7 @@ def templated_seq_nomenclature(element: TemplatedSequenceElement, sr: SeqRepo) -
:raises ValueError: if location isn't a SequenceLocation or if unable
to retrieve region or location
"""
strand_value = "+" if element.strand == Strand.POSITIVE else "-"
if element.region and element.region.location:
location = element.region.location
if isinstance(location, SequenceLocation):
Expand All @@ -111,7 +113,7 @@ def templated_seq_nomenclature(element: TemplatedSequenceElement, sr: SeqRepo) -
]
except IDTranslationException as e:
raise ValueError from e
return f"{refseq_id.split(':')[1]}(chr {chrom}):g.{start}_{end}({element.strand.value})"
return f"{refseq_id.split(':')[1]}(chr {chrom}):g.{start}_{end}({strand_value})"
raise ValueError
raise ValueError

Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def exhaustive_example(alk_gene_descriptor, braf_gene_descriptor):
},
},
},
"strand": "+",
"strand": 1,
},
{"type": "MultiplePossibleGenesElement"},
],
Expand Down Expand Up @@ -668,7 +668,7 @@ def fusion_example():
},
},
},
"strand": "+",
"strand": 1,
},
{"type": "MultiplePossibleGenesElement"},
],
Expand Down
23 changes: 14 additions & 9 deletions tests/test_fusor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import copy

import pytest
from cool_seq_tool.schemas import Strand
from ga4gh.vrsatile.pydantic.vrsatile_models import GeneDescriptor, LocationDescriptor

from fusor.exceptions import FUSORParametersException
Expand Down Expand Up @@ -186,7 +187,7 @@ def templated_sequence_element():
},
},
},
"strand": "+",
"strand": 1,
}
return TemplatedSequenceElement(**params)

Expand All @@ -209,7 +210,7 @@ def templated_sequence_element_ensg():
},
},
},
"strand": "-",
"strand": -1,
}
return TemplatedSequenceElement(**params)

Expand All @@ -234,7 +235,7 @@ def templated_sequence_element_custom_id():
},
},
},
"strand": "+",
"strand": 1,
}
return TemplatedSequenceElement(**params)

Expand Down Expand Up @@ -808,12 +809,12 @@ def test_templated_sequence_element(
):
"""Test that templated sequence element works correctly"""
tsg = fusor_instance.templated_sequence_element(
100, 150, "NC_000001.11", "+", residue_mode="residue"
100, 150, "NC_000001.11", Strand.POSITIVE, residue_mode="residue"
)
assert tsg.model_dump() == templated_sequence_element.model_dump()

tsg = fusor_instance.templated_sequence_element(
99, 150, "NC_000001.11", "+", residue_mode="inter-residue"
99, 150, "NC_000001.11", Strand.POSITIVE, residue_mode="inter-residue"
)
assert tsg.model_dump() == templated_sequence_element.model_dump()

Expand All @@ -826,22 +827,26 @@ def test_templated_sequence_element(
100,
150,
"NC_000001.11",
"+",
Strand.POSITIVE,
add_location_id=True,
seq_id_target_namespace="ga4gh",
)
assert tsg.model_dump() == expected

tsg = fusor_instance.templated_sequence_element(
140719329, 140719400, "ENSG00000157764", "-"
140719329, 140719400, "ENSG00000157764", Strand.NEGATIVE
)
assert tsg.model_dump() == templated_sequence_element_ensg.model_dump()

# test untranslateable sequence ID
# adds "ensembl" namespace but unable to translate to ga4gh digest ID
expected = copy.deepcopy(templated_sequence_element_ensg.model_dump())
tsg = fusor_instance.templated_sequence_element(
140719329, 140719400, "ENSG00000157764", "-", seq_id_target_namespace="ga4gh"
140719329,
140719400,
"ENSG00000157764",
Strand.NEGATIVE,
seq_id_target_namespace="ga4gh",
)
assert tsg.model_dump() == expected

Expand All @@ -852,7 +857,7 @@ def test_templated_sequence_element(
200,
300,
"custom_ID__1",
"+",
Strand.POSITIVE,
residue_mode="inter-residue",
seq_id_target_namespace="ga4gh",
)
Expand Down
9 changes: 5 additions & 4 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import copy

import pytest
from cool_seq_tool.schemas import Strand
from pydantic import ValidationError

from fusor.models import (
Expand Down Expand Up @@ -231,12 +232,12 @@ def templated_sequence_elements(location_descriptors):
return [
{
"type": "TemplatedSequenceElement",
"strand": "+",
"strand": 1,
"region": location_descriptors[5],
},
{
"type": "TemplatedSequenceElement",
"strand": "-",
"strand": -1,
"region": location_descriptors[4],
},
]
Expand Down Expand Up @@ -552,7 +553,7 @@ def assert_genomic_region_test_element(test):
expected values.
"""
assert test.type == "TemplatedSequenceElement"
assert test.strand.value == "+"
assert test.strand == Strand.POSITIVE
assert test.region.id == "chr12:p12.1-p12.2"
assert test.region.type == "LocationDescriptor"
assert test.region.location.species_id == "taxonomy:9606"
Expand Down Expand Up @@ -585,7 +586,7 @@ def assert_genomic_region_test_element(test):
# test enum validation
with pytest.raises(ValidationError) as exc_info:
assert TemplatedSequenceElement(
type="GeneElement", region=location_descriptors[0], strand="+"
type="GeneElement", region=location_descriptors[0], strand=Strand.POSITIVE
)
msg = "Input should be <FUSORTypes.TEMPLATED_SEQUENCE_ELEMENT: 'TemplatedSequenceElement'>"
check_validation_error(exc_info, msg)
Expand Down