From b623aa51b0f8e7edaaa755e81bdddf277b022e44 Mon Sep 17 00:00:00 2001 From: "X.Liu" <34545147+XLIU-hub@users.noreply.github.com> Date: Tue, 30 Apr 2024 13:26:34 +0200 Subject: [PATCH] Include Ensembl Tark API for reference retrieval (#21) * Retrieve transcript info with version number using Tark * Update retrieval order * Add Tark api in configuration * Update CLI * Implement retrieve sequence and annotation for model type * Cleaning * Add tests --------- Authored-by: Xiaoyun Liu --- mutalyzer_retriever/cli.py | 2 +- mutalyzer_retriever/configuration.py | 1 + mutalyzer_retriever/parser.py | 4 +- mutalyzer_retriever/parsers/json_ensembl.py | 159 +++++ mutalyzer_retriever/retriever.py | 35 +- mutalyzer_retriever/sources/ensembl.py | 131 ++-- tests/__init__.py | 0 tests/commons.py | 149 +++++ .../ENST00000000000.5.tark_raw.model.json | 1 + tests/data/ENST00000304494.10.sequence | 1 + tests/data/ENST00000304494.10.tark.model.json | 133 ++++ tests/data/ENST00000304494.10.tark_raw.json | 1 + .../ENST00000304494.5.rest_raw.model.json | 1 + tests/data/ENST00000304494.5.tark.model.json | 1 + .../ENST00000304494.5.tark_raw.model.json | 113 ++++ .../ENST00000304494.7.tark_raw.model.json | 113 ++++ tests/data/ENST00000304494.rest_raw.json | 1 + tests/data/ENST00000304494.sequence | 1 + tests/data/ENST00000304494.tark.model.json | 133 ++++ tests/data/ENST00000304494.tark_raw.json | 578 ++++++++++++++++++ tests/data/ENST00000383925.1.sequence | 1 + tests/data/ENST00000383925.1.tark.model.json | 85 +++ tests/data/ENST00000383925.1.tark_raw.json | 75 +++ tests/data/ENST00000383925.sequence | 1 + tests/data/ENST00000383925.tark.model.json | 85 +++ tests/data/ENST00000383925.tark_raw.json | 1 + tests/test_fetch.py | 89 ++- tests/test_model_validation.py | 108 ++-- tests/test_retriever_model.py | 87 +-- 29 files changed, 1838 insertions(+), 252 deletions(-) create mode 100644 mutalyzer_retriever/parsers/json_ensembl.py create mode 100644 tests/__init__.py create mode 100644 tests/commons.py create mode 100644 tests/data/ENST00000000000.5.tark_raw.model.json create mode 100644 tests/data/ENST00000304494.10.sequence create mode 100644 tests/data/ENST00000304494.10.tark.model.json create mode 100644 tests/data/ENST00000304494.10.tark_raw.json create mode 100644 tests/data/ENST00000304494.5.rest_raw.model.json create mode 100644 tests/data/ENST00000304494.5.tark.model.json create mode 100644 tests/data/ENST00000304494.5.tark_raw.model.json create mode 100644 tests/data/ENST00000304494.7.tark_raw.model.json create mode 100644 tests/data/ENST00000304494.rest_raw.json create mode 100644 tests/data/ENST00000304494.sequence create mode 100644 tests/data/ENST00000304494.tark.model.json create mode 100644 tests/data/ENST00000304494.tark_raw.json create mode 100644 tests/data/ENST00000383925.1.sequence create mode 100644 tests/data/ENST00000383925.1.tark.model.json create mode 100644 tests/data/ENST00000383925.1.tark_raw.json create mode 100644 tests/data/ENST00000383925.sequence create mode 100644 tests/data/ENST00000383925.tark.model.json create mode 100644 tests/data/ENST00000383925.tark_raw.json diff --git a/mutalyzer_retriever/cli.py b/mutalyzer_retriever/cli.py index e078ea4..d74836d 100644 --- a/mutalyzer_retriever/cli.py +++ b/mutalyzer_retriever/cli.py @@ -27,7 +27,7 @@ def _parse_args(args): parser.add_argument("--id", help="the reference id") parser.add_argument( - "-s", "--source", help="retrieval source", choices=["ncbi", "ensembl", "lrg"] + "-s", "--source", help="retrieval source", choices=["ncbi", "ensembl", "ensembl_tark", "ensembl_rest", "lrg"] ) parser.add_argument( diff --git a/mutalyzer_retriever/configuration.py b/mutalyzer_retriever/configuration.py index bb7fe23..aac52bc 100644 --- a/mutalyzer_retriever/configuration.py +++ b/mutalyzer_retriever/configuration.py @@ -10,6 +10,7 @@ "MAX_FILE_SIZE": 10 * 1048576, "ENSEMBL_API": "https://rest.ensembl.org", "ENSEMBL_API_GRCH37": "https://grch37.rest.ensembl.org", + "ENSEMBL_TARK_API":"https://tark.ensembl.org/api", } diff --git a/mutalyzer_retriever/parser.py b/mutalyzer_retriever/parser.py index 38ae986..4919a67 100644 --- a/mutalyzer_retriever/parser.py +++ b/mutalyzer_retriever/parser.py @@ -1,4 +1,4 @@ -from .parsers import fasta, gff3, lrg +from .parsers import fasta, gff3, json_ensembl, lrg def _get_reference_type(content): @@ -19,6 +19,8 @@ def parse(reference_content, reference_type=None, reference_source=None): model = gff3.parse(reference_content, reference_source) elif reference_type == "fasta": model = fasta.parse(reference_content) + elif reference_type == "json": + model = json_ensembl.parse(reference_content) else: return None diff --git a/mutalyzer_retriever/parsers/json_ensembl.py b/mutalyzer_retriever/parsers/json_ensembl.py new file mode 100644 index 0000000..1b6821c --- /dev/null +++ b/mutalyzer_retriever/parsers/json_ensembl.py @@ -0,0 +1,159 @@ +import requests +from ..util import make_location, f_e + + +def _feature(raw_dict): + """Convert a general tark sub-dictionary into our internal model. + - only id and location info; + - Tark locations are 1-based, our model is 0-based. + """ + return { + "id": raw_dict["stable_id"], + "location": make_location( + raw_dict["loc_start"] - 1, raw_dict["loc_end"], raw_dict.get("loc_strand") + ), + } + + +def _annotations(ref_id, location, features): + return { + "id": ref_id, + "type": "record", + "location": location, + "features": features, + } + + +def _exons(tark_exons): + """Convert exons info from tark response list into internal exon list.""" + exons = [] + for tark_exon in tark_exons: + exon = _feature(tark_exon) + exon["type"] = "exon" + exons.append(exon) + return exons + + +def _translation(tark_translations): + """Convert translations per transcript from tark list into internal translation list. + - null for non-coding RNA in input, return an empty list; + - one value for coding RNA in input, return a list of one item; + - rarely multiple values for coding RNA in input with different versions, + return a list of multiple items. + """ + translations = [] + for tark_translation in tark_translations: + translation = _feature(tark_translation) + translation["type"] = "CDS" + translations.append(translation) + return translations + + +def _transcript(tark_transcript, exon_features, translation_feature): + """Convert transcript from tark list into internal transcript list. + - Tark has RNA type as protein_coding, change to internal RNA type mRNA. + """ + transcript = {} + transcript = _feature(tark_transcript) + transcript["type"] = tark_transcript["biotype"] + if transcript["type"] == "protein_coding": + transcript["type"] = "mRNA" + transcript["qualifiers"] = { + "assembly_name": tark_transcript["assembly"], + "version": str(tark_transcript["stable_id_version"]), + "tag": "basic", + } + transcript["features"] = exon_features + translation_feature + return [transcript] + + +def _gene(tark_gene, gene_feature): + """Convert gene info from tark list into internal gene list.""" + gene = {} + gene = _feature(tark_gene) + gene["type"] = "gene" + gene["qualifiers"] = { + "assembly_name": tark_gene["assembly"], + "version": str(tark_gene["stable_id_version"]), + "name": tark_gene["name"], + } + gene["features"] = gene_feature + return [gene] + + +def _seq_from_rest(assembly, chr_idx, strand, loc_start, loc_end, timeout=1): + """Retrieve sequence from ensembl Rest API.""" + if assembly == "GRCh38": + server = "https://rest.ensembl.org" + elif assembly == "GRCh37": + server = "https://grch37.rest.ensembl.org" + else: + raise NameError("Unsupported assembly {assembly}") + ext = f"/sequence/region/human/{chr_idx}:{loc_start}..{loc_end}:{strand}?" + r = requests.get( + server + ext, headers={"Content-Type": "text/plain"}, timeout=timeout + ) + if not r.ok: + raise NameError + return r.text + + +def _sequence(tark_result): + return { + "seq": _seq_from_rest( + tark_result["assembly"], + tark_result["loc_region"], + tark_result["loc_strand"], + tark_result["loc_start"], + tark_result["loc_end"], + ), + "description": " ".join( + [ + f"{tark_result['stable_id']}.{str(tark_result['stable_id_version'])}", + ":".join( + [ + "chromosome", + tark_result["assembly"], + str(tark_result["loc_region"]), + str(tark_result["loc_start"]), + str(tark_result["loc_end"]), + str(tark_result["loc_strand"]), + ] + ), + ] + ), + } + + +def parse(tark_results): + """Convert the Tark json response into the retriever model json output. + - take the latest version from Tark response if no specific version required; + - for genes, take the latest version with "name" field in case of same stable ID + """ + tark_results = tark_results.get("results") + if tark_results: + tark_result = tark_results[-1] + else: + raise NameError(f_e("ensembl tark", e=None, extra="returns no results")) + + exon_features = _exons(tark_result["exons"]) + + translation_features = _translation(tark_result["translations"]) + + transcript_features = _transcript(tark_result, exon_features, translation_features) + + genes = sorted( + tark_result["genes"], + key=lambda g: (g["stable_id_version"], 0 if g["name"] is None else 1), + ) + tark_gene = genes[-1] + gene_feature = _gene(tark_gene, gene_feature=transcript_features) + + return { + "annotations": _annotations( + tark_result["loc_region"], + make_location(tark_result["loc_start"] - 1, tark_result["loc_end"]), + gene_feature, + ), + "sequence": _sequence(tark_result), + } diff --git a/mutalyzer_retriever/retriever.py b/mutalyzer_retriever/retriever.py index e3fb215..d638c4e 100644 --- a/mutalyzer_retriever/retriever.py +++ b/mutalyzer_retriever/retriever.py @@ -18,7 +18,7 @@ class NoReferenceError(Exception): def __init__(self, status, uncertain_sources): self.uncertain_sources = uncertain_sources message = "" - if uncertain_sources is not []: + if uncertain_sources != []: message = f"\n\nUncertain sources: {', '.join(uncertain_sources)}\n" for source in status.keys(): @@ -50,12 +50,14 @@ def _raise_error(status): and isinstance(status[source]["errors"][0], NameError) ): uncertain_sources.append(source) - if uncertain_sources is []: + if uncertain_sources == []: raise NoReferenceRetrieved raise NoReferenceError(status, uncertain_sources) -def _fetch_unknown_source(reference_id, reference_type, size_off=True, timeout=1): +def _fetch_unknown_source( + reference_id, reference_type, reference_source, size_off=True, timeout=1 +): status = {"lrg": {"errors": []}, "ncbi": {"errors": []}, "ensembl": {"errors": []}} @@ -69,9 +71,7 @@ def _fetch_unknown_source(reference_id, reference_type, size_off=True, timeout=1 return reference_content, "lrg", "lrg" else: status["lrg"]["errors"].append( - ValueError( - "Lrg fetch does not support '{}' reference type.".format(reference_type) - ) + ValueError(f"Lrg fetch does not support '{reference_type}' reference type.") ) # NCBI @@ -89,7 +89,7 @@ def _fetch_unknown_source(reference_id, reference_type, size_off=True, timeout=1 # Ensembl try: reference_content, reference_type = ensembl.fetch( - reference_id, reference_type, timeout + reference_id, reference_type, reference_source, timeout ) except (NameError, ConnectionError, ValueError) as e: status["ensembl"]["errors"].append(e) @@ -122,21 +122,20 @@ def retrieve_raw( if reference_source is None: reference_content, reference_type, reference_source = _fetch_unknown_source( - reference_id, reference_type, size_off, timeout + reference_id, reference_type, reference_source, size_off, timeout ) elif reference_source == "ncbi": reference_content, reference_type = ncbi.fetch( reference_id, reference_type, timeout ) - elif reference_source == "ensembl": + elif reference_source in ["ensembl", "ensembl_tark", "ensembl_rest"]: reference_content, reference_type = ensembl.fetch( - reference_id, reference_type, timeout + reference_id, reference_type, reference_source, timeout ) elif reference_source == "lrg": reference_content = lrg.fetch_lrg(reference_id, timeout=timeout) if reference_content: reference_type = "lrg" - return reference_content, reference_type, reference_source @@ -167,9 +166,9 @@ def retrieve_model( model = parser.parse(reference_content, reference_type, reference_source) if model_type == "all": return model - elif model_type == "sequence": + if model_type == "sequence": return model["sequence"] - elif model_type == "annotations": + if model_type == "annotations": return model["annotations"] elif reference_type == "gff3": if model_type == "all": @@ -195,6 +194,16 @@ def retrieve_model( "sequence": parser.parse(reference_content, "fasta"), } + elif reference_type == "json": + if "ensembl" in reference_source: + json_model = parser.parse(reference_content, "json") + if model_type == "all": + return json_model + elif model_type == "annotations": + return json_model["annotations"] + elif model_type == "sequence": + return json_model["sequence"]["seq"] + def retrieve_model_from_file(paths=[], is_lrg=False): """ diff --git a/mutalyzer_retriever/sources/ensembl.py b/mutalyzer_retriever/sources/ensembl.py index 61e796a..7efa883 100644 --- a/mutalyzer_retriever/sources/ensembl.py +++ b/mutalyzer_retriever/sources/ensembl.py @@ -1,30 +1,12 @@ import json +import requests + from ..configuration import settings from ..request import Http400, RequestErrors, request from ..util import f_e -def fetch_json(feature_id, api_base, timeout=1): - url = f"{api_base}/lookup/id/{feature_id}" - params = {"feature": ["gene", "transcript", "cds"], "expand": 1} - headers = {"Content-Type": "application/json"} - try: - response = request(url, params, headers, timeout=timeout) - except RequestErrors as e: - raise ConnectionError(f"(json) {str(e)}") - except Http400 as e: - response_json = e.response.json() - if response_json and response_json.get("error") == "ID '{}' not found".format( - feature_id - ): - raise NameError(f"(json) {str(e)}") - else: - raise e - else: - return response - - def fetch_fasta(feature_id, api_base, timeout=1): url = f"{api_base}/sequence/id/{feature_id}" params = {"format": "fasta", "type": "genomic"} @@ -36,14 +18,10 @@ def fetch_fasta(feature_id, api_base, timeout=1): raise ConnectionError(f_e("gff3", e)) except Http400 as e: response_json = e.response.json() - if response_json and response_json.get("error") == "ID '{}' not found".format( - feature_id - ): + if response_json and response_json.get("error") == f"ID '{feature_id}' not found": raise NameError(f_e("fasta", e, response_json.get("error"))) - else: - raise e - else: - return response + raise e + return response def fetch_gff3(feature_id, api_base, timeout=1): @@ -57,14 +35,28 @@ def fetch_gff3(feature_id, api_base, timeout=1): raise ConnectionError(f_e("gff3", e)) except Http400 as e: response_json = e.response.json() - if response_json and response_json.get("error") == "ID '{}' not found".format( - feature_id - ): + if response_json and response_json.get("error") == f"ID '{feature_id}' not found": raise NameError(f_e("gff3", e, response_json.get("error"))) - else: - raise e - else: - return response + raise e + return response + + +def _get_tark_versions(reference_id, api_base, timeout=1): + endpoint = "transcript" + params = {"stable_id": reference_id} + tark_req = json.loads( + request(url=f"{api_base}/{endpoint}", params=params, timeout=timeout) + ) + tark_versions_38 = [] + tark_versions_37 = [] + if tark_req["results"]: + for r in tark_req["results"]: + if r["assembly"] == "GRCh37": + tark_versions_37.append(int(r["stable_id_version"])) + elif r["assembly"] == "GRCh38": + tark_versions_38.append(int(r["stable_id_version"])) + + return tark_versions_38, tark_versions_37 def _get_most_recent_version(reference_id, api_base, timeout=1): @@ -93,38 +85,67 @@ def _get_id_and_version(reference_id): return r_id, r_version -def _in_grch37(r_id, r_version, r_info, timeout): - api_base = settings.get("ENSEMBL_API_GRCH37") - if r_info["species"] == "homo_sapiens" and int(r_info["version"]) > r_version: - grch37_version = _get_most_recent_version(r_id, api_base, timeout) - if grch37_version and grch37_version == r_version: - return True - return False +def fetch_json(reference_id, reference_version, api_base, assembly="GRCh38", timeout=1): + endpoint = "transcript" + params = { + "stable_id": reference_id, + "assembly_name": assembly, + "stable_id_version": reference_version, + "expand": "translations, genes, exons", + } + req = requests.request( + method="get", url=f"{api_base}/{endpoint}", params=params, timeout=timeout + ) + return req.json() -def fetch(reference_id, reference_type=None, timeout=1): - api_base = settings.get("ENSEMBL_API") - r_id, r_version = _get_id_and_version(reference_id) +def get_rest_api_base(r_id, r_version): + rest_version_38 = _get_most_recent_version(r_id, settings.get("ENSEMBL_API")) + if r_version in [None, rest_version_38]: + return settings.get("ENSEMBL_API"), "GRCh38" + if r_version == _get_most_recent_version(r_id, settings.get("ENSEMBL_API_GRCH37")): + return settings.get("ENSEMBL_API_GRCH37"), "GRCh37" + raise NameError(f"Cannot fetch {r_id}.{r_version} from Ensembl REST") + +def get_transcript_api_base(r_id, r_version, r_source): + if r_source == "ensembl_rest": + return get_rest_api_base(r_id, r_version) + + tark_versions_38, tark_versions_37 = _get_tark_versions(r_id, settings.get("ENSEMBL_TARK_API")) + if r_version is None or r_version in tark_versions_38: + return settings.get("ENSEMBL_TARK_API"), "GRCh38" + if r_version in tark_versions_37: + return settings.get("ENSEMBL_TARK_API"), "GRCh37" + raise NameError(f"Cannot fetch {r_id} from Ensembl Tark") + + +def fetch(reference_id, reference_type=None, reference_source=None, timeout=1): + r_id, r_version = _get_id_and_version(reference_id) if r_id is None: raise NameError - elif r_version is not None: - r_info = _get_reference_information(r_id, api_base, timeout) - if int(r_info["version"]) > r_version: - if _in_grch37(r_id, r_version, r_info, timeout): - api_base = settings.get("ENSEMBL_API_GRCH37") - else: - raise NameError - - if reference_type in [None, "gff3"]: + + if "ENST" in r_id: + api_base, assembly = get_transcript_api_base(r_id, r_version, reference_source) + else: + api_base, assembly = get_rest_api_base(r_id, r_version) + + if reference_type is None: + try: + return fetch_gff3(r_id, api_base, timeout), "gff3" + except ConnectionError: + return fetch_json(r_id, r_version, api_base, assembly, timeout), "json" + elif reference_type == "gff3": return fetch_gff3(r_id, api_base, timeout), "gff3" elif reference_type == "fasta": return fetch_fasta(r_id, api_base, timeout), "fasta" elif reference_type == "json": - return fetch_json(r_id, api_base, timeout), "json" + if reference_source in [None, "ensembl", "ensembl_tark"]: + return fetch_json(r_id, r_version, api_base, assembly, timeout), "json" + elif reference_type == "genbank": return None, "genbank" raise ValueError( - "Ensembl fetch does not support '{}' reference type.".format(reference_type) + f"{reference_source} fetch does not support {reference_type} reference type." ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/commons.py b/tests/commons.py new file mode 100644 index 0000000..ce24204 --- /dev/null +++ b/tests/commons.py @@ -0,0 +1,149 @@ +import json +from pathlib import Path + +import pytest + +from mutalyzer_retriever.configuration import settings +from mutalyzer_retriever.retriever import NoReferenceError + +API_BASE = settings["ENSEMBL_API"] +API_BASE_GRCH37 = settings["ENSEMBL_API_GRCH37"] +TARK_API_BASE = settings["ENSEMBL_TARK_API"] + + +API_BASE_MAP = { + "ENSG00000147889": {"version": 18, "species": "homo_sapiens"}, + "ENSMUSG00000022346": {"version": 18, "species": "mus_musculus"}, + "ENST00000304494": {"version": 10, "species": "homo_sapiens"}, + "ENST00000000000": {"version": 20, "species": "homo_sapiens"}, +} +API_BASE_GRCH37_MAP = { + "ENSG00000147889": {"version": 12, "species": "homo_sapiens"}, + "ENST00000304494": {"version": 5, "species": "homo_sapiens"}, + "ENST00000000000": {"version": 6, "species": "homo_sapiens"}, +} + +TARK_API_BASE_MAP = { + "ENST00000304494": {"GRCH38_version": [10, 9, 8, 7, 6], "GRCH37_version": [5]}, + "ENST00000000000": {"GRCH38_version": [20, 19, 18], "GRCH37_version": [6, 5]}, +} + + +@pytest.fixture(autouse=True) +def patch_retriever(monkeypatch): + """retrieve all monkeypath""" + monkeypatch.setattr("mutalyzer_retriever.sources.ensembl.fetch_gff3", _fetch_gff3) + monkeypatch.setattr( + "mutalyzer_retriever.sources.ensembl._get_reference_information", + _get_reference_information, + ) + monkeypatch.setattr( + "mutalyzer_retriever.sources.ensembl._get_tark_versions", _get_tark_versions + ) + monkeypatch.setattr("mutalyzer_retriever.sources.ensembl.fetch_json", _fetch_json) + monkeypatch.setattr("mutalyzer_retriever.retriever.retrieve_raw", _retrieve_raw) + + +def _fetch_json(r_id, r_version, api_base, assembly, timeout): + if api_base == TARK_API_BASE: + return _get_content(f"data/{r_id}.{r_version}.tark_raw.model.json") + + +def _get_tark_versions(r_id, api_base, timeout=1): + if api_base == TARK_API_BASE and r_id in TARK_API_BASE_MAP: + return ( + TARK_API_BASE_MAP[r_id]["GRCH38_version"], + TARK_API_BASE_MAP[r_id]["GRCH37_version"], + ) + + +def _fetch_gff3(feature_id, api_base, timeout=1): + if api_base == API_BASE_GRCH37: + return _get_content( + f"data/{feature_id}.{API_BASE_GRCH37_MAP[feature_id]['version']}.gff3" + ) + return _get_content(f"data/{feature_id}.gff3") + + +def _get_reference_information(r_id, api_base, timeout=1): + if api_base == API_BASE and r_id in API_BASE_MAP: + return API_BASE_MAP[r_id] + if api_base == API_BASE_GRCH37 and r_id in API_BASE_GRCH37_MAP: + return API_BASE_GRCH37_MAP[r_id] + + +def _get_content(relative_location): + data_file = Path(__file__).parent.joinpath(relative_location) + try: + with open(str(data_file), "r") as file: + content = file.read() + except FileNotFoundError as exc: + raise NoReferenceError({}, []) from exc + return content + + +def _retrieve_raw( + r_id, + r_source=None, + r_type=None, + size_off=True, + configuration_path=None, + timeout=1, +): + if r_type == "fasta": + return _get_content("data/" + r_id + ".fasta"), "fasta", "ncbi" + elif r_id.startswith("LRG_"): + return _get_content("data/" + r_id + ".lrg"), "lrg", "lrg" + elif r_type == "json": + return ( + json.loads(_get_content("data/" + r_id + ".tark_raw.json")), + "json", + "ensembl_tark", + ) + else: + return _get_content("data/" + r_id + ".gff3"), "gff3", "ncbi" + + +references = { + "ncbi": { + "gff3": [ + "NM_078467.2", + "NM_152263.2", + "NM_152263.3", + "NM_000077.4", + "NM_002001.2", + "NG_012337.1", + "NR_002196.2", + "L41870.1", + "NG_007485.1", + "NC_012920.1", + "NG_009930.1", + "AA010203.1", + "NP_060665.3", + "D64137.1", + "AB006684.1", + "NM_004152.3", + "7", + "M65131.1", + "XR_948219.2", + "NR_023343.1", + ] + }, + "ensembl_rest": { + "gff3": [ + "ENSG00000147889", + "ENST00000383925", + "ENST00000304494", + "ENSG00000198899", + ] + }, + "ensembl_tark": { + "json": [ + "ENST00000383925.1", + "ENST00000383925", + "ENST00000304494", + "ENST00000304494.10", + ] + }, + "lrg": {"lrg": ["LRG_11", "LRG_417", "LRG_857"]}, +} diff --git a/tests/data/ENST00000000000.5.tark_raw.model.json b/tests/data/ENST00000000000.5.tark_raw.model.json new file mode 100644 index 0000000..167ce23 --- /dev/null +++ b/tests/data/ENST00000000000.5.tark_raw.model.json @@ -0,0 +1 @@ +{"annotations": {"id": "9", "type": "record", "location": {"type": "range", "start": {"type": "point", "position": 21967751}, "end": {"type": "point", "position": 21974857}}, "features": [{"id": "ENSG00000147889", "type": "gene", "location": {"type": "range", "start": {"type": "point", "position": 21967751}, "end": {"type": "point", "position": 21995301}, "strand": -1}, "qualifiers": {"assembly_name": "GRCh38", "version": "18", "name": "CDKN2A"}, "features": [{"id": "ENST00000304494", "type": "mRNA", "location": {"type": "range", "start": {"type": "point", "position": 21967751}, "end": {"type": "point", "position": 21974857}, "strand": -1}, "qualifiers": {"assembly_name": "GRCh38", "version": "10", "tag": "basic"}, "features": [{"id": "ENSE00001833804", "type": "exon", "location": {"type": "range", "start": {"type": "point", "position": 21974677}, "end": {"type": "point", "position": 21974857}, "strand": -1}}, {"id": "ENSE00003496053", "type": "exon", "location": {"type": "range", "start": {"type": "point", "position": 21970901}, "end": {"type": "point", "position": 21971208}, "strand": -1}}, {"id": "ENSE00003529527", "type": "exon", "location": {"type": "range", "start": {"type": "point", "position": 21967751}, "end": {"type": "point", "position": 21968242}, "strand": -1}}, {"id": "ENSP00000307101", "type": "CDS", "location": {"type": "range", "start": {"type": "point", "position": 21968228}, "end": {"type": "point", "position": 21974827}, "strand": -1}}]}]}]}, "sequence": {"seq": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTGGGTAGAGGGTCTGCAGCGGGAGCAGGGGATGGCGGGCGACTCTGGAGGACGAAGTTTGCAGGGGAATTGGAATCAGGTAGCGCTTCGATTCTCCGGAAAAAGGGGAGGCTTCCTGGGGAGTTTTCAGAAGGGGTTTGTAATCACAGACCTCCTCCTGGCGACGCCCTGGGGGCTTGGGAAGCCAAGGAAGAGGAATGAGGAGCCACGCGCGTACAGATCTCTCGAATGCTGAGAAGATCTGAAGGGGGGAACATATTTGTATTAGATGGAAGTATGCTCTTTATCAGATACAAAATTTACGAACGTTTGGGATAAAAAGGGAGTCTTAAAGAAATGTAAGATGTGCTGGGACTACTTAGCCTCCAATTCACAGATACCTGGATGGAGCTTATCTTTCTTACTAGGAGGGATTATCAGTGGAAATCTGTGGTGTATGTTGGAATAAATATCGAATATAAATTTTGATCGAAATTATTCAGAAGCGGCCGGGCGCGGTGCCTCACGCCTTGTAATCCCTTCACTTTGGGAGATCAAGGCGGGGGGAATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGGCCAACAGGTGAAACCTCGCCTCTACTAAAAATACAAAAAGTAGCCGGGGGTGGTGGCAGGCGCCTGTAATCCCAGCTACTCGGGAGGTTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCTGAGGTTGTAGTGAACAGCGAGATGGAGCCACTTCACTCCAGCCTGGGTGACAGAGTGAGACTTTGTCGAAAGAAAGAAAGAGAGAAAGAGAGAGAGAAAAATTATTCAGAAGCAACTACATATTGTGTTTATTTTTAACTGAGTAGGGCAAATAAATATATGTTTGCTGTAGGAACTTAGGAAATAATGAGCCACATTCATGTGATCATTCCAGAGGTAATATGTAGTTACCATTTTGGGAATATCTGCTAACATTTTTGCTCTTTTACTATCTTTAGCTTACTTGATATAGTTTATTTGTGATAAGAGTTTTCAATTCCTCATTTTTGAACAGAGGTGTTTCTCCTCTCCCTACTCCTGTTTTGTGAGGGAGTTAGGGGAGGATTTAAAAGTAATTAATACATGGGTAACTTAGCATCTCTAAAATTTTGCCAACAGCTTGAACCCGGGAGTTTGGCTTTGTAGTCCTACAATATCTTAGAAGAGACCTTATTTGTTTAAAAACAAAAAGGAAAAAGAAAAGTGGATAGTTTTGACAATTTTTAATGGAGACGGGAGAAGAACATGTAGAAAAGGGGAAATGATGTTGGCTTAGAATCCTAACTACATTGGTGTTTAATATAGGAACATTTATTTATATAACATTTTAAAGTACTAAATTCATATTAGTATATTATCAAATGGATATATTATCAAATGGGTTTAAGCATCCTACACATTTTAATTCAATTGATTCATTTTCTTTTTGCTTTGGATTTCTATCATGATTTAAATATTTACATATGGGTTACTTTTTAGATTTTTCATACTATGAAATATAAGAAAAACCTTTAAGGCTAGTTTTATGACCAAGACGAAGGACTTCATTGAATACACAAAACAATAAATATACTGCAACATTTTGTCTTTCTTTTTGTAGCTGCAATTTGGTTTGCTTATACTTTCTCTTTGTCTCTTTGAAAACTGAGTCAGTTTCACTTTCTCAGGACAGGATTTAATAACCATAATATAATTTAGTATAATTCCTTGATTTAGGCAAATTATGCAATTTGTGTTTAGTATGAAATGTACCTAAAAATAAGTAACTCCTCTTTAACACCACCATCCTCAAACTAATATAACAAATAACAGTTATCCTAAAATAAATTGTCTACTTCCACCATGCAGCACTCAAATTTTAAGGTTGCTATGACTGCAGACAGTATTTTAAAATTCCTCTCTGGAAATGGCTTTGTTTCCAAGATGATTTAGGAACCAAAGAGGTGACCATCTCTTGTTTAATGAACTCTCAAATCATAAACCTGGGAAGTGTTTTAGTTTCCTACTGCTGCTGTTACAAATTATCACAAATGTGTTAGCTAAAACAAACACAAAATTATTATTTTACAGTTCTAGAGATCAGAAGTCAAAAATGGGTCCACAAGGTTTCATTCCTTTTGGAAACTCTAAGGGGCAATCTGTTTCCTTGTCTTTTCCAGCTTCTAGTGACCATCAAATTCCTTGGCTCATGGTCTCTGTATTTTCTCTGTGGCCTGTGCTTCCATTCTTGTATCTTCTCTCTGACTGTGACCCTCTAATAAAAACACTTGGGGTTATGTTGGGCCCACCCTGAAAATTCTGGATAATCTCCCTCAAGACCATTAATTAAATCACATCTGCAAAGCCTCTTTTGCCACATAAGTTAATGTATTAAAAGTTTTTGAGGATTAGGACATAGACATTGGGGGTGGGGGGGCATTATTCAGCCTACCACAGGAAGGAATTTTAGGGTTAATTAAACTAGCCTTCTTATTTTATACTTGAAGAAATTGAAGTTTTGGAATTGGAGAGCATTATGCTAAATGAAATAAGCCAAACACAGAAAGACAAATATCACATGTTCTCACTTATCTGTGAAATATAAAACAATTACATTCTTAGCAGTAAAGAGTAGAATGGTGGTTACTAGAGCTGGGGGGTGGGAGGAATGGGGAGATGGTAATCAAGATATAAAGCCTCAGTTAAGATGGGAGGAATAAGTTTGATTGTTTTTTTTGAGATGTGTTTCATAGCATGATGAATATAGCTAAATAGTAAATCCCAAATGCTCTCATTTGACAAAAATGTCAAATATTTGAGATGATGGATAGGTTACTTAGCTTGACTTAATAATTCCCCATTGTGTTCAAAGATCATAACTTCATATTGTACCACATAAATATATACAACTGTACTATCCCAATATATAATTTTAAAACTAATATAATGAAAAAGAAATTGAAGTTCAACATTCCCAGAAGCTAAGTGTAACTTAAAAGTTTTGTGAGAATTTGTTTTAACAAACAAACAAGTTTTCTCTTTTTAACAATTACCACATTCTGCGCTTGGATATACAGCAGTGAACAAAAAAAAAAAAAAAAATCTCCAGGCCTAACATAATTTCAGGAAGAAATTTCAGTAGTTGTATCTCAGGGGAAATACAGGAAGTTAGCCTGGAGTAAAAGTCAGTCTGTCCCTGCCCCTTTGCTATTTTGCCCGTGCCTCACAGTGCTCTCTGCCTGTGACGACAGCTCCGCAGAAGTTCGGAGGATATAATGGAATTCATTGTGTACTGAAGAATGGATAGAGAACTCAAGAAGGAAATTGGAAACTGGAAGCAAATGTAGGGGTAATTAGACACCTGGGGCTTGTGTGGGGGTCTGCTTGGCGGTGAGGGGGCTCTACACAAGCTTCCTTTCCGTCATGCCGGCCCCCACCCTGGCTCTGACCATTCTGTTCTCTCTGGCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGGTGAGGACTGATGATCTGAGAATTTGTACCCTGAGAGCTTCCAAAGCTCAGAGCATTCATTTTCCAGCACAGAAAGTTCAGCCCGGGAGACCAGTCTCCGGTCTTGCCTCAGCTCACGCGCCAATCGGTGGGACGGCCTGAGTCTCCCTATCGCCCTGCCCCGCCAGGGCGGCAAATGGGAAATAATCCCGAAATGGACTTGCGCACGTGAAAGCCCATTTTGTACATTATACTTCCCAAAGCATACCACCACCCAAACACCTACCCTCTGCTAGTTCAAGGCCTAGACTGCGGAGCAATGAAGACTCAAGAGGCTAGAGGTCTAGTGCCCCCTCTTCCTCCAAACTAGGGCCAGTTGCATCCACTTACCAGGTCTGTTTCCTCATTTGCATACCAAGCTGGCTGGACCAACCTCAGGATTTCCAAACCCAATTGTGCGTGGCATCATCTGGAGATCTCTCGATCTCGGCTCTTCTGCACAACTCAACTAATCTGACCCTCCTCAGCTAATCTGACCCTCCGCTTTATGCGGTAGAGTTTTCCAGAGCTGCCCCAGGGGGTTCTGGGGACATCAGGACCAAGACTTCGCTGACCCTGGCAGTCTGTGCACCGGAGTTGGCTCCTTTCCCTCTTAAACTTGTGCAAGAGATCGCTGAGCGATGAAGGTAGAATTATGGTCCTCCTTGCCCTTGCCTTTCCTTTTTGTGATCTCAAAGCATCCTCCCTCCGCCCCCATTCCATGGCCCCAGTTCCCTACTCCCACAGCTGTCTGCTGAAACTGCCAACATTACTCAATTGTTTCTGGGGGGAGGAACATTTTTTTTTGAAACAAAATAGATATATGAAACAGTACACGGGAATTAACACGAATATTTAAGGTAAAACATGACCTTGAAGATTATGAAATCCATCTTATTTTGGCCCAGAACGGGGGCATTGGGCTCCTTGGGCCATAGGGGAGCTGGGGAGGACAGGGTGAAGAGTTAGCTCTAAGCCCTCTGCTTGGAGATGCTGTAAATACAGAACGCAAAATCACCTTCGAAGTTAAAGACGCGAAGTTCTTCTTTACTCGGCCCCTCCTCCCCTCCCCCCCGCCAATTCCCTCCAGTTACAGCTAGCATCCAGGTCCCGGGAGGTGAAGAAGGAGACTTCGGCTCCAGTTACAGCTAGCATCCGGGTCCCGATTTAGAAGGAGCTGCCAATTACAGCGCGGTTCCAGGGCTGAGCAAAAAGCCTGAGGAGCCAAGTGGGAGAGGGAGTAAAACTACTGAATTGGGCCACAAGCAAATGAATAAACTGAACGACTCTTAACCAAACCTAATATATTTAATCCAAACACACAAGTCTTTCATTTCTTCCCTCCTCCCTTCCTTCTCTTACTCCCCAACACCCCCTCTTCAAGCACAATTAATTATATGGTTAGATTCTACTGCGTGATCAGCCCTGTTCTAGGTGGTGGGCACGCCAAGGTGAATGAGACCAAACAAGAGTCTTGCCCTCATGGGGTTTACATTTGGAGACAGAGTCGATCTGTTGCCCAACCTGGAGTGCAGTGGCGCGATCACAGCTCACTGCAGCCTCAAACTCCCTGGCTCAAGGGGTTCTCCCACCTGAGCCTCCCGACTAGCTGGGACCACAGGTGCACGCCACGACGCCTGGGTTTGTTTGTTTGTTTAATAGAGACGAAGGTCTCACCATGTTATCTGGGCTCAAGCGATCATCCCCCCTCCTCCTCCTAAAGTACTGGGATTACAGTCCCAAGCTATCTTGCCCGACCTGGGAAACAGACGTTAAGGAAGATAACAATCTATTTTCAGAGAGCGAGTTTATAAAACCAATGCAATGGGTAAATATGAAGTGTGAATAGGAGGAGAAGCTAAAGAGTGGTCGGAGAATCTAATGCAAGCTACGGGAGAAAGAAACTCAAGTGCAAATGCTGCCTCAGGAATAAACGTAAAAAGAGACTTTCAAGTGCAAATGCTCCCTCAGGAATAAAATAATCTTGAGACTCTCAAGTGTAAATGCTGCCTCGGGAGAACCGAACGGCGAGCTGGAGCCCATACGCAACGAGATTAGAGAGGAAGGCAGAAGCCAGAGCACATGAATAAATGAGCATCCATTTTGTTTCAGAAATGATCGGAAACCATTTGTGGGTTTGTAGAAGCAGGCATGCGTAGGGAAGCTACGGGATTCCGCCGAGGAGCGCCAGAGCCTGAGGCGCCCTTTGGTTATCGCAAGCTGGCTGGCTCACTCCGCACCAGGTGCAAAAGATGCCTGGGGATGCGGGAAGGGAAAGGCCACATCTTCACGCCTTCGCGCCTGGCATTGTGAGCAACCACTGAGACTCATTATATAACACTCGTTTTCTTCTTGCAACCCTGCGGGCCGCGCGGTCGCGCTTTCTCTGCCCTCCGCCGGGTGGACCTGGAGCGCTTGAGCGGTCGGCGCGCCTGGAGCAGCCAGGCGGGCAGTGGACTAGCTGCTGGACCAGGGAGGTGTGGGAGAGCGGTGGCGGCGGGTACATGCACGTGAAGCCATTGCGAGAACTTTATCCATAAGTATTTCAATGCCGGTAGGGACGGCAAGAGAGGAGGGCGGGATGTGCCACACATCTTTGACCTCAGGTTTCTAACGCCTGTTTTCTTTCTGCCCTCTGCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", "description": "ENST00000304494.10 chromosome:GRCh38:9:21967752:21974857:-1"}} \ No newline at end of file diff --git a/tests/data/ENST00000304494.10.sequence b/tests/data/ENST00000304494.10.sequence new file mode 100644 index 0000000..ec275fa --- /dev/null +++ b/tests/data/ENST00000304494.10.sequence @@ -0,0 +1 @@ +GAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTGGGTAGAGGGTCTGCAGCGGGAGCAGGGGATGGCGGGCGACTCTGGAGGACGAAGTTTGCAGGGGAATTGGAATCAGGTAGCGCTTCGATTCTCCGGAAAAAGGGGAGGCTTCCTGGGGAGTTTTCAGAAGGGGTTTGTAATCACAGACCTCCTCCTGGCGACGCCCTGGGGGCTTGGGAAGCCAAGGAAGAGGAATGAGGAGCCACGCGCGTACAGATCTCTCGAATGCTGAGAAGATCTGAAGGGGGGAACATATTTGTATTAGATGGAAGTATGCTCTTTATCAGATACAAAATTTACGAACGTTTGGGATAAAAAGGGAGTCTTAAAGAAATGTAAGATGTGCTGGGACTACTTAGCCTCCAATTCACAGATACCTGGATGGAGCTTATCTTTCTTACTAGGAGGGATTATCAGTGGAAATCTGTGGTGTATGTTGGAATAAATATCGAATATAAATTTTGATCGAAATTATTCAGAAGCGGCCGGGCGCGGTGCCTCACGCCTTGTAATCCCTTCACTTTGGGAGATCAAGGCGGGGGGAATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGGCCAACAGGTGAAACCTCGCCTCTACTAAAAATACAAAAAGTAGCCGGGGGTGGTGGCAGGCGCCTGTAATCCCAGCTACTCGGGAGGTTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCTGAGGTTGTAGTGAACAGCGAGATGGAGCCACTTCACTCCAGCCTGGGTGACAGAGTGAGACTTTGTCGAAAGAAAGAAAGAGAGAAAGAGAGAGAGAAAAATTATTCAGAAGCAACTACATATTGTGTTTATTTTTAACTGAGTAGGGCAAATAAATATATGTTTGCTGTAGGAACTTAGGAAATAATGAGCCACATTCATGTGATCATTCCAGAGGTAATATGTAGTTACCATTTTGGGAATATCTGCTAACATTTTTGCTCTTTTACTATCTTTAGCTTACTTGATATAGTTTATTTGTGATAAGAGTTTTCAATTCCTCATTTTTGAACAGAGGTGTTTCTCCTCTCCCTACTCCTGTTTTGTGAGGGAGTTAGGGGAGGATTTAAAAGTAATTAATACATGGGTAACTTAGCATCTCTAAAATTTTGCCAACAGCTTGAACCCGGGAGTTTGGCTTTGTAGTCCTACAATATCTTAGAAGAGACCTTATTTGTTTAAAAACAAAAAGGAAAAAGAAAAGTGGATAGTTTTGACAATTTTTAATGGAGACGGGAGAAGAACATGTAGAAAAGGGGAAATGATGTTGGCTTAGAATCCTAACTACATTGGTGTTTAATATAGGAACATTTATTTATATAACATTTTAAAGTACTAAATTCATATTAGTATATTATCAAATGGATATATTATCAAATGGGTTTAAGCATCCTACACATTTTAATTCAATTGATTCATTTTCTTTTTGCTTTGGATTTCTATCATGATTTAAATATTTACATATGGGTTACTTTTTAGATTTTTCATACTATGAAATATAAGAAAAACCTTTAAGGCTAGTTTTATGACCAAGACGAAGGACTTCATTGAATACACAAAACAATAAATATACTGCAACATTTTGTCTTTCTTTTTGTAGCTGCAATTTGGTTTGCTTATACTTTCTCTTTGTCTCTTTGAAAACTGAGTCAGTTTCACTTTCTCAGGACAGGATTTAATAACCATAATATAATTTAGTATAATTCCTTGATTTAGGCAAATTATGCAATTTGTGTTTAGTATGAAATGTACCTAAAAATAAGTAACTCCTCTTTAACACCACCATCCTCAAACTAATATAACAAATAACAGTTATCCTAAAATAAATTGTCTACTTCCACCATGCAGCACTCAAATTTTAAGGTTGCTATGACTGCAGACAGTATTTTAAAATTCCTCTCTGGAAATGGCTTTGTTTCCAAGATGATTTAGGAACCAAAGAGGTGACCATCTCTTGTTTAATGAACTCTCAAATCATAAACCTGGGAAGTGTTTTAGTTTCCTACTGCTGCTGTTACAAATTATCACAAATGTGTTAGCTAAAACAAACACAAAATTATTATTTTACAGTTCTAGAGATCAGAAGTCAAAAATGGGTCCACAAGGTTTCATTCCTTTTGGAAACTCTAAGGGGCAATCTGTTTCCTTGTCTTTTCCAGCTTCTAGTGACCATCAAATTCCTTGGCTCATGGTCTCTGTATTTTCTCTGTGGCCTGTGCTTCCATTCTTGTATCTTCTCTCTGACTGTGACCCTCTAATAAAAACACTTGGGGTTATGTTGGGCCCACCCTGAAAATTCTGGATAATCTCCCTCAAGACCATTAATTAAATCACATCTGCAAAGCCTCTTTTGCCACATAAGTTAATGTATTAAAAGTTTTTGAGGATTAGGACATAGACATTGGGGGTGGGGGGGCATTATTCAGCCTACCACAGGAAGGAATTTTAGGGTTAATTAAACTAGCCTTCTTATTTTATACTTGAAGAAATTGAAGTTTTGGAATTGGAGAGCATTATGCTAAATGAAATAAGCCAAACACAGAAAGACAAATATCACATGTTCTCACTTATCTGTGAAATATAAAACAATTACATTCTTAGCAGTAAAGAGTAGAATGGTGGTTACTAGAGCTGGGGGGTGGGAGGAATGGGGAGATGGTAATCAAGATATAAAGCCTCAGTTAAGATGGGAGGAATAAGTTTGATTGTTTTTTTTGAGATGTGTTTCATAGCATGATGAATATAGCTAAATAGTAAATCCCAAATGCTCTCATTTGACAAAAATGTCAAATATTTGAGATGATGGATAGGTTACTTAGCTTGACTTAATAATTCCCCATTGTGTTCAAAGATCATAACTTCATATTGTACCACATAAATATATACAACTGTACTATCCCAATATATAATTTTAAAACTAATATAATGAAAAAGAAATTGAAGTTCAACATTCCCAGAAGCTAAGTGTAACTTAAAAGTTTTGTGAGAATTTGTTTTAACAAACAAACAAGTTTTCTCTTTTTAACAATTACCACATTCTGCGCTTGGATATACAGCAGTGAACAAAAAAAAAAAAAAAAATCTCCAGGCCTAACATAATTTCAGGAAGAAATTTCAGTAGTTGTATCTCAGGGGAAATACAGGAAGTTAGCCTGGAGTAAAAGTCAGTCTGTCCCTGCCCCTTTGCTATTTTGCCCGTGCCTCACAGTGCTCTCTGCCTGTGACGACAGCTCCGCAGAAGTTCGGAGGATATAATGGAATTCATTGTGTACTGAAGAATGGATAGAGAACTCAAGAAGGAAATTGGAAACTGGAAGCAAATGTAGGGGTAATTAGACACCTGGGGCTTGTGTGGGGGTCTGCTTGGCGGTGAGGGGGCTCTACACAAGCTTCCTTTCCGTCATGCCGGCCCCCACCCTGGCTCTGACCATTCTGTTCTCTCTGGCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGGTGAGGACTGATGATCTGAGAATTTGTACCCTGAGAGCTTCCAAAGCTCAGAGCATTCATTTTCCAGCACAGAAAGTTCAGCCCGGGAGACCAGTCTCCGGTCTTGCCTCAGCTCACGCGCCAATCGGTGGGACGGCCTGAGTCTCCCTATCGCCCTGCCCCGCCAGGGCGGCAAATGGGAAATAATCCCGAAATGGACTTGCGCACGTGAAAGCCCATTTTGTACATTATACTTCCCAAAGCATACCACCACCCAAACACCTACCCTCTGCTAGTTCAAGGCCTAGACTGCGGAGCAATGAAGACTCAAGAGGCTAGAGGTCTAGTGCCCCCTCTTCCTCCAAACTAGGGCCAGTTGCATCCACTTACCAGGTCTGTTTCCTCATTTGCATACCAAGCTGGCTGGACCAACCTCAGGATTTCCAAACCCAATTGTGCGTGGCATCATCTGGAGATCTCTCGATCTCGGCTCTTCTGCACAACTCAACTAATCTGACCCTCCTCAGCTAATCTGACCCTCCGCTTTATGCGGTAGAGTTTTCCAGAGCTGCCCCAGGGGGTTCTGGGGACATCAGGACCAAGACTTCGCTGACCCTGGCAGTCTGTGCACCGGAGTTGGCTCCTTTCCCTCTTAAACTTGTGCAAGAGATCGCTGAGCGATGAAGGTAGAATTATGGTCCTCCTTGCCCTTGCCTTTCCTTTTTGTGATCTCAAAGCATCCTCCCTCCGCCCCCATTCCATGGCCCCAGTTCCCTACTCCCACAGCTGTCTGCTGAAACTGCCAACATTACTCAATTGTTTCTGGGGGGAGGAACATTTTTTTTTGAAACAAAATAGATATATGAAACAGTACACGGGAATTAACACGAATATTTAAGGTAAAACATGACCTTGAAGATTATGAAATCCATCTTATTTTGGCCCAGAACGGGGGCATTGGGCTCCTTGGGCCATAGGGGAGCTGGGGAGGACAGGGTGAAGAGTTAGCTCTAAGCCCTCTGCTTGGAGATGCTGTAAATACAGAACGCAAAATCACCTTCGAAGTTAAAGACGCGAAGTTCTTCTTTACTCGGCCCCTCCTCCCCTCCCCCCCGCCAATTCCCTCCAGTTACAGCTAGCATCCAGGTCCCGGGAGGTGAAGAAGGAGACTTCGGCTCCAGTTACAGCTAGCATCCGGGTCCCGATTTAGAAGGAGCTGCCAATTACAGCGCGGTTCCAGGGCTGAGCAAAAAGCCTGAGGAGCCAAGTGGGAGAGGGAGTAAAACTACTGAATTGGGCCACAAGCAAATGAATAAACTGAACGACTCTTAACCAAACCTAATATATTTAATCCAAACACACAAGTCTTTCATTTCTTCCCTCCTCCCTTCCTTCTCTTACTCCCCAACACCCCCTCTTCAAGCACAATTAATTATATGGTTAGATTCTACTGCGTGATCAGCCCTGTTCTAGGTGGTGGGCACGCCAAGGTGAATGAGACCAAACAAGAGTCTTGCCCTCATGGGGTTTACATTTGGAGACAGAGTCGATCTGTTGCCCAACCTGGAGTGCAGTGGCGCGATCACAGCTCACTGCAGCCTCAAACTCCCTGGCTCAAGGGGTTCTCCCACCTGAGCCTCCCGACTAGCTGGGACCACAGGTGCACGCCACGACGCCTGGGTTTGTTTGTTTGTTTAATAGAGACGAAGGTCTCACCATGTTATCTGGGCTCAAGCGATCATCCCCCCTCCTCCTCCTAAAGTACTGGGATTACAGTCCCAAGCTATCTTGCCCGACCTGGGAAACAGACGTTAAGGAAGATAACAATCTATTTTCAGAGAGCGAGTTTATAAAACCAATGCAATGGGTAAATATGAAGTGTGAATAGGAGGAGAAGCTAAAGAGTGGTCGGAGAATCTAATGCAAGCTACGGGAGAAAGAAACTCAAGTGCAAATGCTGCCTCAGGAATAAACGTAAAAAGAGACTTTCAAGTGCAAATGCTCCCTCAGGAATAAAATAATCTTGAGACTCTCAAGTGTAAATGCTGCCTCGGGAGAACCGAACGGCGAGCTGGAGCCCATACGCAACGAGATTAGAGAGGAAGGCAGAAGCCAGAGCACATGAATAAATGAGCATCCATTTTGTTTCAGAAATGATCGGAAACCATTTGTGGGTTTGTAGAAGCAGGCATGCGTAGGGAAGCTACGGGATTCCGCCGAGGAGCGCCAGAGCCTGAGGCGCCCTTTGGTTATCGCAAGCTGGCTGGCTCACTCCGCACCAGGTGCAAAAGATGCCTGGGGATGCGGGAAGGGAAAGGCCACATCTTCACGCCTTCGCGCCTGGCATTGTGAGCAACCACTGAGACTCATTATATAACACTCGTTTTCTTCTTGCAACCCTGCGGGCCGCGCGGTCGCGCTTTCTCTGCCCTCCGCCGGGTGGACCTGGAGCGCTTGAGCGGTCGGCGCGCCTGGAGCAGCCAGGCGGGCAGTGGACTAGCTGCTGGACCAGGGAGGTGTGGGAGAGCGGTGGCGGCGGGTACATGCACGTGAAGCCATTGCGAGAACTTTATCCATAAGTATTTCAATGCCGGTAGGGACGGCAAGAGAGGAGGGCGGGATGTGCCACACATCTTTGACCTCAGGTTTCTAACGCCTGTTTTCTTTCTGCCCTCTGCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA \ No newline at end of file diff --git a/tests/data/ENST00000304494.10.tark.model.json b/tests/data/ENST00000304494.10.tark.model.json new file mode 100644 index 0000000..e93748a --- /dev/null +++ b/tests/data/ENST00000304494.10.tark.model.json @@ -0,0 +1,133 @@ +{ + "annotations": { + "id": "9", + "type": "record", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21974857 + } + }, + "features": [ + { + "id": "ENSG00000147889", + "type": "gene", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21995301 + }, + "strand": -1 + }, + "qualifiers": { + "name": "CDKN2A", + "assembly_name": "GRCh38", + "version": "18" + }, + "features": [ + { + "id": "ENST00000304494", + "type": "mRNA", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21974857 + }, + "strand": -1 + }, + "qualifiers": { + "assembly_name": "GRCh38", + "tag": "basic", + "version": "10" + }, + "features": [ + { + "id": "ENSE00001833804", + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21974677 + }, + "end": { + "type": "point", + "position": 21974857 + }, + "strand": -1 + } + }, + { + "id": "ENSE00003496053", + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21970901 + }, + "end": { + "type": "point", + "position": 21971208 + }, + "strand": -1 + } + }, + { + "id": "ENSE00003529527", + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21968242 + }, + "strand": -1 + } + }, + { + "id": "ENSP00000307101", + "type": "CDS", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21968228 + }, + "end": { + "type": "point", + "position": 21974827 + }, + "strand": -1 + } + } + ] + } + ] + } + ] + }, + "sequence": { + "seq": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTGGGTAGAGGGTCTGCAGCGGGAGCAGGGGATGGCGGGCGACTCTGGAGGACGAAGTTTGCAGGGGAATTGGAATCAGGTAGCGCTTCGATTCTCCGGAAAAAGGGGAGGCTTCCTGGGGAGTTTTCAGAAGGGGTTTGTAATCACAGACCTCCTCCTGGCGACGCCCTGGGGGCTTGGGAAGCCAAGGAAGAGGAATGAGGAGCCACGCGCGTACAGATCTCTCGAATGCTGAGAAGATCTGAAGGGGGGAACATATTTGTATTAGATGGAAGTATGCTCTTTATCAGATACAAAATTTACGAACGTTTGGGATAAAAAGGGAGTCTTAAAGAAATGTAAGATGTGCTGGGACTACTTAGCCTCCAATTCACAGATACCTGGATGGAGCTTATCTTTCTTACTAGGAGGGATTATCAGTGGAAATCTGTGGTGTATGTTGGAATAAATATCGAATATAAATTTTGATCGAAATTATTCAGAAGCGGCCGGGCGCGGTGCCTCACGCCTTGTAATCCCTTCACTTTGGGAGATCAAGGCGGGGGGAATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGGCCAACAGGTGAAACCTCGCCTCTACTAAAAATACAAAAAGTAGCCGGGGGTGGTGGCAGGCGCCTGTAATCCCAGCTACTCGGGAGGTTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCTGAGGTTGTAGTGAACAGCGAGATGGAGCCACTTCACTCCAGCCTGGGTGACAGAGTGAGACTTTGTCGAAAGAAAGAAAGAGAGAAAGAGAGAGAGAAAAATTATTCAGAAGCAACTACATATTGTGTTTATTTTTAACTGAGTAGGGCAAATAAATATATGTTTGCTGTAGGAACTTAGGAAATAATGAGCCACATTCATGTGATCATTCCAGAGGTAATATGTAGTTACCATTTTGGGAATATCTGCTAACATTTTTGCTCTTTTACTATCTTTAGCTTACTTGATATAGTTTATTTGTGATAAGAGTTTTCAATTCCTCATTTTTGAACAGAGGTGTTTCTCCTCTCCCTACTCCTGTTTTGTGAGGGAGTTAGGGGAGGATTTAAAAGTAATTAATACATGGGTAACTTAGCATCTCTAAAATTTTGCCAACAGCTTGAACCCGGGAGTTTGGCTTTGTAGTCCTACAATATCTTAGAAGAGACCTTATTTGTTTAAAAACAAAAAGGAAAAAGAAAAGTGGATAGTTTTGACAATTTTTAATGGAGACGGGAGAAGAACATGTAGAAAAGGGGAAATGATGTTGGCTTAGAATCCTAACTACATTGGTGTTTAATATAGGAACATTTATTTATATAACATTTTAAAGTACTAAATTCATATTAGTATATTATCAAATGGATATATTATCAAATGGGTTTAAGCATCCTACACATTTTAATTCAATTGATTCATTTTCTTTTTGCTTTGGATTTCTATCATGATTTAAATATTTACATATGGGTTACTTTTTAGATTTTTCATACTATGAAATATAAGAAAAACCTTTAAGGCTAGTTTTATGACCAAGACGAAGGACTTCATTGAATACACAAAACAATAAATATACTGCAACATTTTGTCTTTCTTTTTGTAGCTGCAATTTGGTTTGCTTATACTTTCTCTTTGTCTCTTTGAAAACTGAGTCAGTTTCACTTTCTCAGGACAGGATTTAATAACCATAATATAATTTAGTATAATTCCTTGATTTAGGCAAATTATGCAATTTGTGTTTAGTATGAAATGTACCTAAAAATAAGTAACTCCTCTTTAACACCACCATCCTCAAACTAATATAACAAATAACAGTTATCCTAAAATAAATTGTCTACTTCCACCATGCAGCACTCAAATTTTAAGGTTGCTATGACTGCAGACAGTATTTTAAAATTCCTCTCTGGAAATGGCTTTGTTTCCAAGATGATTTAGGAACCAAAGAGGTGACCATCTCTTGTTTAATGAACTCTCAAATCATAAACCTGGGAAGTGTTTTAGTTTCCTACTGCTGCTGTTACAAATTATCACAAATGTGTTAGCTAAAACAAACACAAAATTATTATTTTACAGTTCTAGAGATCAGAAGTCAAAAATGGGTCCACAAGGTTTCATTCCTTTTGGAAACTCTAAGGGGCAATCTGTTTCCTTGTCTTTTCCAGCTTCTAGTGACCATCAAATTCCTTGGCTCATGGTCTCTGTATTTTCTCTGTGGCCTGTGCTTCCATTCTTGTATCTTCTCTCTGACTGTGACCCTCTAATAAAAACACTTGGGGTTATGTTGGGCCCACCCTGAAAATTCTGGATAATCTCCCTCAAGACCATTAATTAAATCACATCTGCAAAGCCTCTTTTGCCACATAAGTTAATGTATTAAAAGTTTTTGAGGATTAGGACATAGACATTGGGGGTGGGGGGGCATTATTCAGCCTACCACAGGAAGGAATTTTAGGGTTAATTAAACTAGCCTTCTTATTTTATACTTGAAGAAATTGAAGTTTTGGAATTGGAGAGCATTATGCTAAATGAAATAAGCCAAACACAGAAAGACAAATATCACATGTTCTCACTTATCTGTGAAATATAAAACAATTACATTCTTAGCAGTAAAGAGTAGAATGGTGGTTACTAGAGCTGGGGGGTGGGAGGAATGGGGAGATGGTAATCAAGATATAAAGCCTCAGTTAAGATGGGAGGAATAAGTTTGATTGTTTTTTTTGAGATGTGTTTCATAGCATGATGAATATAGCTAAATAGTAAATCCCAAATGCTCTCATTTGACAAAAATGTCAAATATTTGAGATGATGGATAGGTTACTTAGCTTGACTTAATAATTCCCCATTGTGTTCAAAGATCATAACTTCATATTGTACCACATAAATATATACAACTGTACTATCCCAATATATAATTTTAAAACTAATATAATGAAAAAGAAATTGAAGTTCAACATTCCCAGAAGCTAAGTGTAACTTAAAAGTTTTGTGAGAATTTGTTTTAACAAACAAACAAGTTTTCTCTTTTTAACAATTACCACATTCTGCGCTTGGATATACAGCAGTGAACAAAAAAAAAAAAAAAAATCTCCAGGCCTAACATAATTTCAGGAAGAAATTTCAGTAGTTGTATCTCAGGGGAAATACAGGAAGTTAGCCTGGAGTAAAAGTCAGTCTGTCCCTGCCCCTTTGCTATTTTGCCCGTGCCTCACAGTGCTCTCTGCCTGTGACGACAGCTCCGCAGAAGTTCGGAGGATATAATGGAATTCATTGTGTACTGAAGAATGGATAGAGAACTCAAGAAGGAAATTGGAAACTGGAAGCAAATGTAGGGGTAATTAGACACCTGGGGCTTGTGTGGGGGTCTGCTTGGCGGTGAGGGGGCTCTACACAAGCTTCCTTTCCGTCATGCCGGCCCCCACCCTGGCTCTGACCATTCTGTTCTCTCTGGCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGGTGAGGACTGATGATCTGAGAATTTGTACCCTGAGAGCTTCCAAAGCTCAGAGCATTCATTTTCCAGCACAGAAAGTTCAGCCCGGGAGACCAGTCTCCGGTCTTGCCTCAGCTCACGCGCCAATCGGTGGGACGGCCTGAGTCTCCCTATCGCCCTGCCCCGCCAGGGCGGCAAATGGGAAATAATCCCGAAATGGACTTGCGCACGTGAAAGCCCATTTTGTACATTATACTTCCCAAAGCATACCACCACCCAAACACCTACCCTCTGCTAGTTCAAGGCCTAGACTGCGGAGCAATGAAGACTCAAGAGGCTAGAGGTCTAGTGCCCCCTCTTCCTCCAAACTAGGGCCAGTTGCATCCACTTACCAGGTCTGTTTCCTCATTTGCATACCAAGCTGGCTGGACCAACCTCAGGATTTCCAAACCCAATTGTGCGTGGCATCATCTGGAGATCTCTCGATCTCGGCTCTTCTGCACAACTCAACTAATCTGACCCTCCTCAGCTAATCTGACCCTCCGCTTTATGCGGTAGAGTTTTCCAGAGCTGCCCCAGGGGGTTCTGGGGACATCAGGACCAAGACTTCGCTGACCCTGGCAGTCTGTGCACCGGAGTTGGCTCCTTTCCCTCTTAAACTTGTGCAAGAGATCGCTGAGCGATGAAGGTAGAATTATGGTCCTCCTTGCCCTTGCCTTTCCTTTTTGTGATCTCAAAGCATCCTCCCTCCGCCCCCATTCCATGGCCCCAGTTCCCTACTCCCACAGCTGTCTGCTGAAACTGCCAACATTACTCAATTGTTTCTGGGGGGAGGAACATTTTTTTTTGAAACAAAATAGATATATGAAACAGTACACGGGAATTAACACGAATATTTAAGGTAAAACATGACCTTGAAGATTATGAAATCCATCTTATTTTGGCCCAGAACGGGGGCATTGGGCTCCTTGGGCCATAGGGGAGCTGGGGAGGACAGGGTGAAGAGTTAGCTCTAAGCCCTCTGCTTGGAGATGCTGTAAATACAGAACGCAAAATCACCTTCGAAGTTAAAGACGCGAAGTTCTTCTTTACTCGGCCCCTCCTCCCCTCCCCCCCGCCAATTCCCTCCAGTTACAGCTAGCATCCAGGTCCCGGGAGGTGAAGAAGGAGACTTCGGCTCCAGTTACAGCTAGCATCCGGGTCCCGATTTAGAAGGAGCTGCCAATTACAGCGCGGTTCCAGGGCTGAGCAAAAAGCCTGAGGAGCCAAGTGGGAGAGGGAGTAAAACTACTGAATTGGGCCACAAGCAAATGAATAAACTGAACGACTCTTAACCAAACCTAATATATTTAATCCAAACACACAAGTCTTTCATTTCTTCCCTCCTCCCTTCCTTCTCTTACTCCCCAACACCCCCTCTTCAAGCACAATTAATTATATGGTTAGATTCTACTGCGTGATCAGCCCTGTTCTAGGTGGTGGGCACGCCAAGGTGAATGAGACCAAACAAGAGTCTTGCCCTCATGGGGTTTACATTTGGAGACAGAGTCGATCTGTTGCCCAACCTGGAGTGCAGTGGCGCGATCACAGCTCACTGCAGCCTCAAACTCCCTGGCTCAAGGGGTTCTCCCACCTGAGCCTCCCGACTAGCTGGGACCACAGGTGCACGCCACGACGCCTGGGTTTGTTTGTTTGTTTAATAGAGACGAAGGTCTCACCATGTTATCTGGGCTCAAGCGATCATCCCCCCTCCTCCTCCTAAAGTACTGGGATTACAGTCCCAAGCTATCTTGCCCGACCTGGGAAACAGACGTTAAGGAAGATAACAATCTATTTTCAGAGAGCGAGTTTATAAAACCAATGCAATGGGTAAATATGAAGTGTGAATAGGAGGAGAAGCTAAAGAGTGGTCGGAGAATCTAATGCAAGCTACGGGAGAAAGAAACTCAAGTGCAAATGCTGCCTCAGGAATAAACGTAAAAAGAGACTTTCAAGTGCAAATGCTCCCTCAGGAATAAAATAATCTTGAGACTCTCAAGTGTAAATGCTGCCTCGGGAGAACCGAACGGCGAGCTGGAGCCCATACGCAACGAGATTAGAGAGGAAGGCAGAAGCCAGAGCACATGAATAAATGAGCATCCATTTTGTTTCAGAAATGATCGGAAACCATTTGTGGGTTTGTAGAAGCAGGCATGCGTAGGGAAGCTACGGGATTCCGCCGAGGAGCGCCAGAGCCTGAGGCGCCCTTTGGTTATCGCAAGCTGGCTGGCTCACTCCGCACCAGGTGCAAAAGATGCCTGGGGATGCGGGAAGGGAAAGGCCACATCTTCACGCCTTCGCGCCTGGCATTGTGAGCAACCACTGAGACTCATTATATAACACTCGTTTTCTTCTTGCAACCCTGCGGGCCGCGCGGTCGCGCTTTCTCTGCCCTCCGCCGGGTGGACCTGGAGCGCTTGAGCGGTCGGCGCGCCTGGAGCAGCCAGGCGGGCAGTGGACTAGCTGCTGGACCAGGGAGGTGTGGGAGAGCGGTGGCGGCGGGTACATGCACGTGAAGCCATTGCGAGAACTTTATCCATAAGTATTTCAATGCCGGTAGGGACGGCAAGAGAGGAGGGCGGGATGTGCCACACATCTTTGACCTCAGGTTTCTAACGCCTGTTTTCTTTCTGCCCTCTGCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", + "description": "ENST00000304494.10 chromosome:GRCh38:9:21967752:21974857:-1" + } +} \ No newline at end of file diff --git a/tests/data/ENST00000304494.10.tark_raw.json b/tests/data/ENST00000304494.10.tark_raw.json new file mode 100644 index 0000000..a8581cf --- /dev/null +++ b/tests/data/ENST00000304494.10.tark_raw.json @@ -0,0 +1 @@ +{"count": 1, "next": null, "previous": null, "results": [{"stable_id": "ENST00000304494", "stable_id_version": 10, "assembly": "GRCh38", "loc_start": 21967752, "loc_end": 21974857, "loc_strand": -1, "loc_region": "9", "loc_checksum": "F8C16E95E11B4DBAD97B78D927A868E7D1C8F08E", "exon_set_checksum": "C0DB9C7510347D5058F8D6A0103B25CEB7A5B7F2", "transcript_checksum": "C744046AE49A2A1FCAFE45B5E1DB219BD8D1E710", "sequence": {"sequence": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", "seq_checksum": "790809E0CD049743CFC6338048865F256AA64D3F"}, "biotype": "protein_coding", "three_prime_utr_start": 21968228, "three_prime_utr_end": 21967752, "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", "three_prime_utr_checksum": "04EBC613707A51244E4725491FF9A67F674274E3", "five_prime_utr_start": 21974857, "five_prime_utr_end": 21974828, "five_prime_utr_seq": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", "five_prime_utr_checksum": "33BCE24430AB5E5064811BCAF5AFFB9DD9FB1620", "translations": [{"stable_id": "ENSP00000307101", "stable_id_version": 5, "assembly": "GRCh38", "loc_start": 21968229, "loc_end": 21974827, "loc_strand": -1, "loc_region": "9", "loc_checksum": "105755D5C4F7A1A01F50DDF4A451CEE8004002A2", "translation_id": 176189, "translation_checksum": "1C48A45C9C622B7CFD8CD09854FF8D7DBD3D1BB2"}], "genes": [{"stable_id": "ENSG00000147889", "stable_id_version": 18, "assembly": "GRCh38", "loc_start": 21967752, "loc_end": 21995301, "loc_strand": -1, "loc_region": "9", "loc_checksum": "ED15957CEA6CCF777595B51C1CB825B646B57347", "name": "CDKN2A", "gene_checksum": "AEEBF97735FA3B5F87BE93069A64D1C96734A339"}], "exons": [{"exon_id": 57506579, "stable_id": "ENSE00001833804", "stable_id_version": 2, "assembly": "GRCh38", "loc_start": 21974678, "loc_end": 21974857, "loc_strand": -1, "loc_region": "9", "loc_checksum": "8784F1FDE2A72E091CBD1198F21D74B2A43E28B7", "exon_checksum": "11B77CAE42B359F07D425E90D7B2958419F6009D", "exon_order": 1}, {"exon_id": 2185928, "stable_id": "ENSE00003496053", "stable_id_version": 1, "assembly": "GRCh38", "loc_start": 21970902, "loc_end": 21971208, "loc_strand": -1, "loc_region": "9", "loc_checksum": "4F25E374539A33AA0B4AD698A0011A58DC547920", "exon_checksum": "15440459F0F3192E523F6122E233671C0D5B5466", "exon_order": 2}, {"exon_id": 57506621, "stable_id": "ENSE00003529527", "stable_id_version": 2, "assembly": "GRCh38", "loc_start": 21967752, "loc_end": 21968242, "loc_strand": -1, "loc_region": "9", "loc_checksum": "695C89D1D5FD5C182A5AB67FCF461C8915DB0F25", "exon_checksum": "2C6867673825E460D80853A074DA46A6824434A5", "exon_order": 3}], "mane_transcript": "NM_000077.5", "mane_transcript_type": "MANE SELECT", "cds_info": {"translation_start": 21968229, "translation_end": 21974827, "three_prime_utr_start": 21968228, "three_prime_utr_end": 21967752, "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", "three_prime_utr_length": 477, "five_prime_utr_start": 21974857, "five_prime_utr_end": 21974828, "five_prime_utr_seq": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", "five_prime_utr_length": 30, "loc_region": "9", "loc_strand": -1, "cds_seq": "ATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGA"}}]} \ No newline at end of file diff --git a/tests/data/ENST00000304494.5.rest_raw.model.json b/tests/data/ENST00000304494.5.rest_raw.model.json new file mode 100644 index 0000000..b9ba016 --- /dev/null +++ b/tests/data/ENST00000304494.5.rest_raw.model.json @@ -0,0 +1 @@ +{"species":"homo_sapiens","start":21967752,"strand":-1,"end":21975097,"object_type":"Transcript","version":5,"source":"ensembl_havana","Translation":{"Parent":"ENST00000304494","version":5,"id":"ENSP00000307101","length":156,"start":21968228,"db_type":"core","species":"homo_sapiens","object_type":"Translation","end":21974826},"is_canonical":0,"display_name":"CDKN2A-001","length":1218,"id":"ENST00000304494","db_type":"core","seq_region_name":"9","Exon":[{"seq_region_name":"9","db_type":"core","start":21974677,"species":"homo_sapiens","strand":-1,"end":21975097,"object_type":"Exon","version":1,"assembly_name":"GRCh37","id":"ENSE00001833804"},{"assembly_name":"GRCh37","id":"ENSE00003496053","version":1,"end":21971207,"object_type":"Exon","start":21970901,"species":"homo_sapiens","db_type":"core","seq_region_name":"9","strand":-1},{"assembly_name":"GRCh37","id":"ENSE00003529527","version":1,"object_type":"Exon","end":21968241,"strand":-1,"seq_region_name":"9","db_type":"core","species":"homo_sapiens","start":21967752}],"logic_name":"ensembl_havana_transcript","Parent":"ENSG00000147889","biotype":"protein_coding","assembly_name":"GRCh37"} \ No newline at end of file diff --git a/tests/data/ENST00000304494.5.tark.model.json b/tests/data/ENST00000304494.5.tark.model.json new file mode 100644 index 0000000..88a4bd0 --- /dev/null +++ b/tests/data/ENST00000304494.5.tark.model.json @@ -0,0 +1 @@ +{"annotations": {"id": "9", "type": "record", "location": {"type": "range", "start": {"type": "point", "position": 21967751}, "end": {"type": "point", "position": 21975097}}, "features": [{"id": "ENSG00000147889", "type": "gene", "location": {"type": "range", "start": {"type": "point", "position": 21967750}, "end": {"type": "point", "position": 21995300}, "strand": -1}, "qualifiers": {"assembly_name": "GRCh37", "version": "12", "name": "CDKN2A"}, "features": [{"id": "ENST00000304494", "type": "mRNA", "location": {"type": "range", "start": {"type": "point", "position": 21967751}, "end": {"type": "point", "position": 21975097}, "strand": -1}, "qualifiers": {"assembly_name": "GRCh37", "version": "5", "tag": "basic"}, "features": [{"id": "ENSE00001833804", "type": "exon", "location": {"type": "range", "start": {"type": "point", "position": 21974676}, "end": {"type": "point", "position": 21975097}, "strand": -1}}, {"id": "ENSE00003496053", "type": "exon", "location": {"type": "range", "start": {"type": "point", "position": 21970900}, "end": {"type": "point", "position": 21971207}, "strand": -1}}, {"id": "ENSE00003529527", "type": "exon", "location": {"type": "range", "start": {"type": "point", "position": 21967751}, "end": {"type": "point", "position": 21968241}, "strand": -1}}, {"id": "ENSP00000307101", "type": "CDS", "location": {"type": "range", "start": {"type": "point", "position": 21968227}, "end": {"type": "point", "position": 21974826}, "strand": -1}}]}]}]}, "sequence": {"seq": "CCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTGGGTAGAGGGTCTGCAGCGGGAGCAGGGGATGGCGGGCGACTCTGGAGGACGAAGTTTGCAGGGGAATTGGAATCAGGTAGCGCTTCGATTCTCCGGAAAAAGGGGAGGCTTCCTGGGGAGTTTTCAGAAGGGGTTTGTAATCACAGACCTCCTCCTGGCGACGCCCTGGGGGCTTGGGAAGCCAAGGAAGAGGAATGAGGAGCCACGCGCGTACAGATCTCTCGAATGCTGAGAAGATCTGAAGGGGGGAACATATTTGTATTAGATGGAAGTATGCTCTTTATCAGATACAAAATTTACGAACGTTTGGGATAAAAAGGGAGTCTTAAAGAAATGTAAGATGTGCTGGGACTACTTAGCCTCCAATTCACAGATACCTGGATGGAGCTTATCTTTCTTACTAGGAGGGATTATCAGTGGAAATCTGTGGTGTATGTTGGAATAAATATCGAATATAAATTTTGATCGAAATTATTCAGAAGCGGCCGGGCGCGGTGCCTCACGCCTTGTAATCCCTTCACTTTGGGAGATCAAGGCGGGGGGAATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGGCCAACAGGTGAAACCTCGCCTCTACTAAAAATACAAAAAGTAGCCGGGGGTGGTGGCAGGCGCCTGTAATCCCAGCTACTCGGGAGGTTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCTGAGGTTGTAGTGAACAGCGAGATGGAGCCACTTCACTCCAGCCTGGGTGACAGAGTGAGACTTTGTCGAAAGAAAGAAAGAGAGAAAGAGAGAGAGAAAAATTATTCAGAAGCAACTACATATTGTGTTTATTTTTAACTGAGTAGGGCAAATAAATATATGTTTGCTGTAGGAACTTAGGAAATAATGAGCCACATTCATGTGATCATTCCAGAGGTAATATGTAGTTACCATTTTGGGAATATCTGCTAACATTTTTGCTCTTTTACTATCTTTAGCTTACTTGATATAGTTTATTTGTGATAAGAGTTTTCAATTCCTCATTTTTGAACAGAGGTGTTTCTCCTCTCCCTACTCCTGTTTTGTGAGGGAGTTAGGGGAGGATTTAAAAGTAATTAATACATGGGTAACTTAGCATCTCTAAAATTTTGCCAACAGCTTGAACCCGGGAGTTTGGCTTTGTAGTCCTACAATATCTTAGAAGAGACCTTATTTGTTTAAAAACAAAAAGGAAAAAGAAAAGTGGATAGTTTTGACAATTTTTAATGGAGACGGGAGAAGAACATGTAGAAAAGGGGAAATGATGTTGGCTTAGAATCCTAACTACATTGGTGTTTAATATAGGAACATTTATTTATATAACATTTTAAAGTACTAAATTCATATTAGTATATTATCAAATGGATATATTATCAAATGGGTTTAAGCATCCTACACATTTTAATTCAATTGATTCATTTTCTTTTTGCTTTGGATTTCTATCATGATTTAAATATTTACATATGGGTTACTTTTTAGATTTTTCATACTATGAAATATAAGAAAAACCTTTAAGGCTAGTTTTATGACCAAGACGAAGGACTTCATTGAATACACAAAACAATAAATATACTGCAACATTTTGTCTTTCTTTTTGTAGCTGCAATTTGGTTTGCTTATACTTTCTCTTTGTCTCTTTGAAAACTGAGTCAGTTTCACTTTCTCAGGACAGGATTTAATAACCATAATATAATTTAGTATAATTCCTTGATTTAGGCAAATTATGCAATTTGTGTTTAGTATGAAATGTACCTAAAAATAAGTAACTCCTCTTTAACACCACCATCCTCAAACTAATATAACAAATAACAGTTATCCTAAAATAAATTGTCTACTTCCACCATGCAGCACTCAAATTTTAAGGTTGCTATGACTGCAGACAGTATTTTAAAATTCCTCTCTGGAAATGGCTTTGTTTCCAAGATGATTTAGGAACCAAAGAGGTGACCATCTCTTGTTTAATGAACTCTCAAATCATAAACCTGGGAAGTGTTTTAGTTTCCTACTGCTGCTGTTACAAATTATCACAAATGTGTTAGCTAAAACAAACACAAAATTATTATTTTACAGTTCTAGAGATCAGAAGTCAAAAATGGGTCCACAAGGTTTCATTCCTTTTGGAAACTCTAAGGGGCAATCTGTTTCCTTGTCTTTTCCAGCTTCTAGTGACCATCAAATTCCTTGGCTCATGGTCTCTGTATTTTCTCTGTGGCCTGTGCTTCCATTCTTGTATCTTCTCTCTGACTGTGACCCTCTAATAAAAACACTTGGGGTTATGTTGGGCCCACCCTGAAAATTCTGGATAATCTCCCTCAAGACCATTAATTAAATCACATCTGCAAAGCCTCTTTTGCCACATAAGTTAATGTATTAAAAGTTTTTGAGGATTAGGACATAGACATTGGGGGTGGGGGGGCATTATTCAGCCTACCACAGGAAGGAATTTTAGGGTTAATTAAACTAGCCTTCTTATTTTATACTTGAAGAAATTGAAGTTTTGGAATTGGAGAGCATTATGCTAAATGAAATAAGCCAAACACAGAAAGACAAATATCACATGTTCTCACTTATCTGTGAAATATAAAACAATTACATTCTTAGCAGTAAAGAGTAGAATGGTGGTTACTAGAGCTGGGGGGTGGGAGGAATGGGGAGATGGTAATCAAGATATAAAGCCTCAGTTAAGATGGGAGGAATAAGTTTGATTGTTTTTTTTGAGATGTGTTTCATAGCATGATGAATATAGCTAAATAGTAAATCCCAAATGCTCTCATTTGACAAAAATGTCAAATATTTGAGATGATGGATAGGTTACTTAGCTTGACTTAATAATTCCCCATTGTGTTCAAAGATCATAACTTCATATTGTACCACATAAATATATACAACTGTACTATCCCAATATATAATTTTAAAACTAATATAATGAAAAAGAAATTGAAGTTCAACATTCCCAGAAGCTAAGTGTAACTTAAAAGTTTTGTGAGAATTTGTTTTAACAAACAAACAAGTTTTCTCTTTTTAACAATTACCACATTCTGCGCTTGGATATACAGCAGTGAACAAAAAAAAAAAAAAAAATCTCCAGGCCTAACATAATTTCAGGAAGAAATTTCAGTAGTTGTATCTCAGGGGAAATACAGGAAGTTAGCCTGGAGTAAAAGTCAGTCTGTCCCTGCCCCTTTGCTATTTTGCCCGTGCCTCACAGTGCTCTCTGCCTGTGACGACAGCTCCGCAGAAGTTCGGAGGATATAATGGAATTCATTGTGTACTGAAGAATGGATAGAGAACTCAAGAAGGAAATTGGAAACTGGAAGCAAATGTAGGGGTAATTAGACACCTGGGGCTTGTGTGGGGGTCTGCTTGGCGGTGAGGGGGCTCTACACAAGCTTCCTTTCCGTCATGCCGGCCCCCACCCTGGCTCTGACCATTCTGTTCTCTCTGGCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGGTGAGGACTGATGATCTGAGAATTTGTACCCTGAGAGCTTCCAAAGCTCAGAGCATTCATTTTCCAGCACAGAAAGTTCAGCCCGGGAGACCAGTCTCCGGTCTTGCCTCAGCTCACGCGCCAATCGGTGGGACGGCCTGAGTCTCCCTATCGCCCTGCCCCGCCAGGGCGGCAAATGGGAAATAATCCCGAAATGGACTTGCGCACGTGAAAGCCCATTTTGTACATTATACTTCCCAAAGCATACCACCACCCAAACACCTACCCTCTGCTAGTTCAAGGCCTAGACTGCGGAGCAATGAAGACTCAAGAGGCTAGAGGTCTAGTGCCCCCTCTTCCTCCAAACTAGGGCCAGTTGCATCCACTTACCAGGTCTGTTTCCTCATTTGCATACCAAGCTGGCTGGACCAACCTCAGGATTTCCAAACCCAATTGTGCGTGGCATCATCTGGAGATCTCTCGATCTCGGCTCTTCTGCACAACTCAACTAATCTGACCCTCCTCAGCTAATCTGACCCTCCGCTTTATGCGGTAGAGTTTTCCAGAGCTGCCCCAGGGGGTTCTGGGGACATCAGGACCAAGACTTCGCTGACCCTGGCAGTCTGTGCACCGGAGTTGGCTCCTTTCCCTCTTAAACTTGTGCAAGAGATCGCTGAGCGATGAAGGTAGAATTATGGTCCTCCTTGCCCTTGCCTTTCCTTTTTGTGATCTCAAAGCATCCTCCCTCCGCCCCCATTCCATGGCCCCAGTTCCCTACTCCCACAGCTGTCTGCTGAAACTGCCAACATTACTCAATTGTTTCTGGGGGGAGGAACATTTTTTTTTGAAACAAAATAGATATATGAAACAGTACACGGGAATTAACACGAATATTTAAGGTAAAACATGACCTTGAAGATTATGAAATCCATCTTATTTTGGCCCAGAACGGGGGCATTGGGCTCCTTGGGCCATAGGGGAGCTGGGGAGGACAGGGTGAAGAGTTAGCTCTAAGCCCTCTGCTTGGAGATGCTGTAAATACAGAACGCAAAATCACCTTCGAAGTTAAAGACGCGAAGTTCTTCTTTACTCGGCCCCTCCTCCCCTCCCCCCCGCCAATTCCCTCCAGTTACAGCTAGCATCCAGGTCCCGGGAGGTGAAGAAGGAGACTTCGGCTCCAGTTACAGCTAGCATCCGGGTCCCGATTTAGAAGGAGCTGCCAATTACAGCGCGGTTCCAGGGCTGAGCAAAAAGCCTGAGGAGCCAAGTGGGAGAGGGAGTAAAACTACTGAATTGGGCCACAAGCAAATGAATAAACTGAACGACTCTTAACCAAACCTAATATATTTAATCCAAACACACAAGTCTTTCATTTCTTCCCTCCTCCCTTCCTTCTCTTACTCCCCAACACCCCCTCTTCAAGCACAATTAATTATATGGTTAGATTCTACTGCGTGATCAGCCCTGTTCTAGGTGGTGGGCACGCCAAGGTGAATGAGACCAAACAAGAGTCTTGCCCTCATGGGGTTTACATTTGGAGACAGAGTCGATCTGTTGCCCAACCTGGAGTGCAGTGGCGCGATCACAGCTCACTGCAGCCTCAAACTCCCTGGCTCAAGGGGTTCTCCCACCTGAGCCTCCCGACTAGCTGGGACCACAGGTGCACGCCACGACGCCTGGGTTTGTTTGTTTGTTTAATAGAGACGAAGGTCTCACCATGTTATCTGGGCTCAAGCGATCATCCCCCCTCCTCCTCCTAAAGTACTGGGATTACAGTCCCAAGCTATCTTGCCCGACCTGGGAAACAGACGTTAAGGAAGATAACAATCTATTTTCAGAGAGCGAGTTTATAAAACCAATGCAATGGGTAAATATGAAGTGTGAATAGGAGGAGAAGCTAAAGAGTGGTCGGAGAATCTAATGCAAGCTACGGGAGAAAGAAACTCAAGTGCAAATGCTGCCTCAGGAATAAACGTAAAAAGAGACTTTCAAGTGCAAATGCTCCCTCAGGAATAAAATAATCTTGAGACTCTCAAGTGTAAATGCTGCCTCGGGAGAACCGAACGGCGAGCTGGAGCCCATACGCAACGAGATTAGAGAGGAAGGCAGAAGCCAGAGCACATGAATAAATGAGCATCCATTTTGTTTCAGAAATGATCGGAAACCATTTGTGGGTTTGTAGAAGCAGGCATGCGTAGGGAAGCTACGGGATTCCGCCGAGGAGCGCCAGAGCCTGAGGCGCCCTTTGGTTATCGCAAGCTGGCTGGCTCACTCCGCACCAGGTGCAAAAGATGCCTGGGGATGCGGGAAGGGAAAGGCCACATCTTCACGCCTTCGCGCCTGGCATTGTGAGCAACCACTGAGACTCATTATATAACACTCGTTTTCTTCTTGCAACCCTGCGGGCCGCGCGGTCGCGCTTTCTCTGCCCTCCGCCGGGTGGACCTGGAGCGCTTGAGCGGTCGGCGCGCCTGGAGCAGCCAGGCGGGCAGTGGACTAGCTGCTGGACCAGGGAGGTGTGGGAGAGCGGTGGCGGCGGGTACATGCACGTGAAGCCATTGCGAGAACTTTATCCATAAGTATTTCAATGCCGGTAGGGACGGCAAGAGAGGAGGGCGGGATGTGCCACACATCTTTGACCTCAGGTTTCTAACGCCTGTTTTCTTTCTGCCCTCTGCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", "description": "ENST00000304494.5 chromosome:GRCh38:9:21967752:21975097:-1"}} \ No newline at end of file diff --git a/tests/data/ENST00000304494.5.tark_raw.model.json b/tests/data/ENST00000304494.5.tark_raw.model.json new file mode 100644 index 0000000..12b3d05 --- /dev/null +++ b/tests/data/ENST00000304494.5.tark_raw.model.json @@ -0,0 +1,113 @@ +{ + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "stable_id": "ENST00000304494", + "stable_id_version": 5, + "assembly": "GRCh37", + "loc_start": 21967752, + "loc_end": 21975097, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "AF14A7EF08A2D083221C5CBE5A5BDB640DAFE864", + "exon_set_checksum": "0DF94634690292BFF3D0BF54F52DE3FB99F961AC", + "transcript_checksum": "57E5BEB8F356FD0B891766B34FE69835AE1FA4A7", + "sequence": "333B6F94ADA93A896B846BC5C7AEB96E94820586", + "biotype": "protein_coding", + "three_prime_utr_start": 21968227, + "three_prime_utr_end": 21967752, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_checksum": "70052E171F9EE5D232FF603466B86D58860234A6", + "five_prime_utr_start": 21975097, + "five_prime_utr_end": 21974827, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_checksum": "DFA62E34AC3102492C6D9F0FB409A82BD35936D9", + "genes": [ + { + "stable_id": "ENSG00000147889", + "stable_id_version": 12, + "assembly": "GRCh37", + "loc_start": 21967751, + "loc_end": 21995300, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "58E323587CE57142F1F3B26CBD6346E915A35E8B", + "name": "CDKN2A", + "gene_checksum": "56F297F48E4337D90960A7AF5639779653D60AF1" + } + ], + "translations": [ + { + "stable_id": "ENSP00000307101", + "stable_id_version": 5, + "assembly": "GRCh37", + "loc_start": 21968228, + "loc_end": 21974826, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "53ABB47C63DC3808B670BB795618EF59A3C0A556", + "translation_id": 42167, + "translation_checksum": "067C584C442586AC7AC9A41E97E315F35252A2B7" + } + ], + "exons": [ + { + "exon_id": 529814, + "stable_id": "ENSE00001833804", + "stable_id_version": 1, + "assembly": "GRCh37", + "loc_start": 21974677, + "loc_end": 21975097, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "C255F59467B7CB8DBBFEC1721238AE59672C5111", + "exon_checksum": "8BE4D9EC25BE9807A32411548222A935D9E67164", + "exon_order": 1 + }, + { + "exon_id": 529840, + "stable_id": "ENSE00003496053", + "stable_id_version": 1, + "assembly": "GRCh37", + "loc_start": 21970901, + "loc_end": 21971207, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "E030192BDB1208BA52FDC1D5207105CE6D6DBF22", + "exon_checksum": "2F7F1BE16B489A003836875E13CCDE09A38A94E9", + "exon_order": 2 + }, + { + "exon_id": 529861, + "stable_id": "ENSE00003529527", + "stable_id_version": 1, + "assembly": "GRCh37", + "loc_start": 21967752, + "loc_end": 21968241, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "D2764CB80F524FAE6C7A3E8DB9291DCFA0CC2777", + "exon_checksum": "A084AF11FBED4FC441D2BF67885D2A648680F277", + "exon_order": 3 + } + ], + "cds_info": { + "translation_start": 21968228, + "translation_end": 21974826, + "three_prime_utr_start": 21968227, + "three_prime_utr_end": 21967752, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_length": 476, + "five_prime_utr_start": 21975097, + "five_prime_utr_end": 21974827, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_length": 271, + "loc_region": "9", + "loc_strand": -1, + "cds_seq": null + } + } + ] + } \ No newline at end of file diff --git a/tests/data/ENST00000304494.7.tark_raw.model.json b/tests/data/ENST00000304494.7.tark_raw.model.json new file mode 100644 index 0000000..a02152a --- /dev/null +++ b/tests/data/ENST00000304494.7.tark_raw.model.json @@ -0,0 +1,113 @@ +{ + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "stable_id": "ENST00000304494", + "stable_id_version": 7, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EB92F4E6FD93748288E662923ED722E1DFD217CE", + "exon_set_checksum": "EA54C10367494ACFCE2C62B8AA9C769F786177BA", + "transcript_checksum": "D89AED3D3AB6D7B76DD7DEC4F551DA6D44396E55", + "sequence": "333B6F94ADA93A896B846BC5C7AEB96E94820586", + "biotype": "protein_coding", + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_checksum": "70052E171F9EE5D232FF603466B86D58860234A6", + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_checksum": "DFA62E34AC3102492C6D9F0FB409A82BD35936D9", + "genes": [ + { + "stable_id": "ENSG00000147889", + "stable_id_version": 14, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "B3E7AB60256CD76F74C1C722A11970E2F7703772", + "name": "CDKN2A", + "gene_checksum": "8012D9147D7D0119B3DD16952CED05AA878A6E25" + } + ], + "translations": [ + { + "stable_id": "ENSP00000307101", + "stable_id_version": 5, + "assembly": "GRCh38", + "loc_start": 21968229, + "loc_end": 21974827, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "105755D5C4F7A1A01F50DDF4A451CEE8004002A2", + "translation_id": 176189, + "translation_checksum": "1C48A45C9C622B7CFD8CD09854FF8D7DBD3D1BB2" + } + ], + "exons": [ + { + "exon_id": 2185905, + "stable_id": "ENSE00001833804", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21974678, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EE821B3A3ED8D819F162D8D69EF6A075A838360C", + "exon_checksum": "C390D4A5F64CD3AB88BEAA1BD3830EA847A60726", + "exon_order": 1 + }, + { + "exon_id": 2185928, + "stable_id": "ENSE00003496053", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21970902, + "loc_end": 21971208, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "4F25E374539A33AA0B4AD698A0011A58DC547920", + "exon_checksum": "15440459F0F3192E523F6122E233671C0D5B5466", + "exon_order": 2 + }, + { + "exon_id": 2185952, + "stable_id": "ENSE00003529527", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21968242, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "D4F0BD05A559F79A9EB1822B6292BA756AF0C109", + "exon_checksum": "56E5BF98EFE9F3A667B8E34C15D1EB420267ABE0", + "exon_order": 3 + } + ], + "cds_info": { + "translation_start": 21968229, + "translation_end": 21974827, + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_length": 476, + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_length": 271, + "loc_region": "9", + "loc_strand": -1, + "cds_seq": null + } + } + ] +} \ No newline at end of file diff --git a/tests/data/ENST00000304494.rest_raw.json b/tests/data/ENST00000304494.rest_raw.json new file mode 100644 index 0000000..afaccef --- /dev/null +++ b/tests/data/ENST00000304494.rest_raw.json @@ -0,0 +1 @@ +{"assembly_name":"GRCh38","db_type":"core","biotype":"protein_coding","length":978,"end":21974857,"start":21967752,"Translation":{"species":"homo_sapiens","version":5,"Parent":"ENST00000304494","end":21974827,"start":21968229,"object_type":"Translation","db_type":"core","length":156,"id":"ENSP00000307101"},"version":10,"source":"ensembl_havana","strand":-1,"logic_name":"ensembl_havana_transcript_homo_sapiens","object_type":"Transcript","id":"ENST00000304494","display_name":"CDKN2A-201","is_canonical":1,"Parent":"ENSG00000147889","species":"homo_sapiens","Exon":[{"seq_region_name":"9","species":"homo_sapiens","version":2,"end":21974857,"start":21974678,"object_type":"Exon","strand":-1,"db_type":"core","assembly_name":"GRCh38","id":"ENSE00001833804"},{"version":1,"species":"homo_sapiens","seq_region_name":"9","id":"ENSE00003496053","object_type":"Exon","assembly_name":"GRCh38","strand":-1,"db_type":"core","start":21970902,"end":21971208},{"end":21968242,"start":21967752,"object_type":"Exon","db_type":"core","strand":-1,"assembly_name":"GRCh38","id":"ENSE00003529527","seq_region_name":"9","species":"homo_sapiens","version":2}],"seq_region_name":"9"} \ No newline at end of file diff --git a/tests/data/ENST00000304494.sequence b/tests/data/ENST00000304494.sequence new file mode 100644 index 0000000..ec275fa --- /dev/null +++ b/tests/data/ENST00000304494.sequence @@ -0,0 +1 @@ +GAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTGGGTAGAGGGTCTGCAGCGGGAGCAGGGGATGGCGGGCGACTCTGGAGGACGAAGTTTGCAGGGGAATTGGAATCAGGTAGCGCTTCGATTCTCCGGAAAAAGGGGAGGCTTCCTGGGGAGTTTTCAGAAGGGGTTTGTAATCACAGACCTCCTCCTGGCGACGCCCTGGGGGCTTGGGAAGCCAAGGAAGAGGAATGAGGAGCCACGCGCGTACAGATCTCTCGAATGCTGAGAAGATCTGAAGGGGGGAACATATTTGTATTAGATGGAAGTATGCTCTTTATCAGATACAAAATTTACGAACGTTTGGGATAAAAAGGGAGTCTTAAAGAAATGTAAGATGTGCTGGGACTACTTAGCCTCCAATTCACAGATACCTGGATGGAGCTTATCTTTCTTACTAGGAGGGATTATCAGTGGAAATCTGTGGTGTATGTTGGAATAAATATCGAATATAAATTTTGATCGAAATTATTCAGAAGCGGCCGGGCGCGGTGCCTCACGCCTTGTAATCCCTTCACTTTGGGAGATCAAGGCGGGGGGAATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGGCCAACAGGTGAAACCTCGCCTCTACTAAAAATACAAAAAGTAGCCGGGGGTGGTGGCAGGCGCCTGTAATCCCAGCTACTCGGGAGGTTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCTGAGGTTGTAGTGAACAGCGAGATGGAGCCACTTCACTCCAGCCTGGGTGACAGAGTGAGACTTTGTCGAAAGAAAGAAAGAGAGAAAGAGAGAGAGAAAAATTATTCAGAAGCAACTACATATTGTGTTTATTTTTAACTGAGTAGGGCAAATAAATATATGTTTGCTGTAGGAACTTAGGAAATAATGAGCCACATTCATGTGATCATTCCAGAGGTAATATGTAGTTACCATTTTGGGAATATCTGCTAACATTTTTGCTCTTTTACTATCTTTAGCTTACTTGATATAGTTTATTTGTGATAAGAGTTTTCAATTCCTCATTTTTGAACAGAGGTGTTTCTCCTCTCCCTACTCCTGTTTTGTGAGGGAGTTAGGGGAGGATTTAAAAGTAATTAATACATGGGTAACTTAGCATCTCTAAAATTTTGCCAACAGCTTGAACCCGGGAGTTTGGCTTTGTAGTCCTACAATATCTTAGAAGAGACCTTATTTGTTTAAAAACAAAAAGGAAAAAGAAAAGTGGATAGTTTTGACAATTTTTAATGGAGACGGGAGAAGAACATGTAGAAAAGGGGAAATGATGTTGGCTTAGAATCCTAACTACATTGGTGTTTAATATAGGAACATTTATTTATATAACATTTTAAAGTACTAAATTCATATTAGTATATTATCAAATGGATATATTATCAAATGGGTTTAAGCATCCTACACATTTTAATTCAATTGATTCATTTTCTTTTTGCTTTGGATTTCTATCATGATTTAAATATTTACATATGGGTTACTTTTTAGATTTTTCATACTATGAAATATAAGAAAAACCTTTAAGGCTAGTTTTATGACCAAGACGAAGGACTTCATTGAATACACAAAACAATAAATATACTGCAACATTTTGTCTTTCTTTTTGTAGCTGCAATTTGGTTTGCTTATACTTTCTCTTTGTCTCTTTGAAAACTGAGTCAGTTTCACTTTCTCAGGACAGGATTTAATAACCATAATATAATTTAGTATAATTCCTTGATTTAGGCAAATTATGCAATTTGTGTTTAGTATGAAATGTACCTAAAAATAAGTAACTCCTCTTTAACACCACCATCCTCAAACTAATATAACAAATAACAGTTATCCTAAAATAAATTGTCTACTTCCACCATGCAGCACTCAAATTTTAAGGTTGCTATGACTGCAGACAGTATTTTAAAATTCCTCTCTGGAAATGGCTTTGTTTCCAAGATGATTTAGGAACCAAAGAGGTGACCATCTCTTGTTTAATGAACTCTCAAATCATAAACCTGGGAAGTGTTTTAGTTTCCTACTGCTGCTGTTACAAATTATCACAAATGTGTTAGCTAAAACAAACACAAAATTATTATTTTACAGTTCTAGAGATCAGAAGTCAAAAATGGGTCCACAAGGTTTCATTCCTTTTGGAAACTCTAAGGGGCAATCTGTTTCCTTGTCTTTTCCAGCTTCTAGTGACCATCAAATTCCTTGGCTCATGGTCTCTGTATTTTCTCTGTGGCCTGTGCTTCCATTCTTGTATCTTCTCTCTGACTGTGACCCTCTAATAAAAACACTTGGGGTTATGTTGGGCCCACCCTGAAAATTCTGGATAATCTCCCTCAAGACCATTAATTAAATCACATCTGCAAAGCCTCTTTTGCCACATAAGTTAATGTATTAAAAGTTTTTGAGGATTAGGACATAGACATTGGGGGTGGGGGGGCATTATTCAGCCTACCACAGGAAGGAATTTTAGGGTTAATTAAACTAGCCTTCTTATTTTATACTTGAAGAAATTGAAGTTTTGGAATTGGAGAGCATTATGCTAAATGAAATAAGCCAAACACAGAAAGACAAATATCACATGTTCTCACTTATCTGTGAAATATAAAACAATTACATTCTTAGCAGTAAAGAGTAGAATGGTGGTTACTAGAGCTGGGGGGTGGGAGGAATGGGGAGATGGTAATCAAGATATAAAGCCTCAGTTAAGATGGGAGGAATAAGTTTGATTGTTTTTTTTGAGATGTGTTTCATAGCATGATGAATATAGCTAAATAGTAAATCCCAAATGCTCTCATTTGACAAAAATGTCAAATATTTGAGATGATGGATAGGTTACTTAGCTTGACTTAATAATTCCCCATTGTGTTCAAAGATCATAACTTCATATTGTACCACATAAATATATACAACTGTACTATCCCAATATATAATTTTAAAACTAATATAATGAAAAAGAAATTGAAGTTCAACATTCCCAGAAGCTAAGTGTAACTTAAAAGTTTTGTGAGAATTTGTTTTAACAAACAAACAAGTTTTCTCTTTTTAACAATTACCACATTCTGCGCTTGGATATACAGCAGTGAACAAAAAAAAAAAAAAAAATCTCCAGGCCTAACATAATTTCAGGAAGAAATTTCAGTAGTTGTATCTCAGGGGAAATACAGGAAGTTAGCCTGGAGTAAAAGTCAGTCTGTCCCTGCCCCTTTGCTATTTTGCCCGTGCCTCACAGTGCTCTCTGCCTGTGACGACAGCTCCGCAGAAGTTCGGAGGATATAATGGAATTCATTGTGTACTGAAGAATGGATAGAGAACTCAAGAAGGAAATTGGAAACTGGAAGCAAATGTAGGGGTAATTAGACACCTGGGGCTTGTGTGGGGGTCTGCTTGGCGGTGAGGGGGCTCTACACAAGCTTCCTTTCCGTCATGCCGGCCCCCACCCTGGCTCTGACCATTCTGTTCTCTCTGGCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGGTGAGGACTGATGATCTGAGAATTTGTACCCTGAGAGCTTCCAAAGCTCAGAGCATTCATTTTCCAGCACAGAAAGTTCAGCCCGGGAGACCAGTCTCCGGTCTTGCCTCAGCTCACGCGCCAATCGGTGGGACGGCCTGAGTCTCCCTATCGCCCTGCCCCGCCAGGGCGGCAAATGGGAAATAATCCCGAAATGGACTTGCGCACGTGAAAGCCCATTTTGTACATTATACTTCCCAAAGCATACCACCACCCAAACACCTACCCTCTGCTAGTTCAAGGCCTAGACTGCGGAGCAATGAAGACTCAAGAGGCTAGAGGTCTAGTGCCCCCTCTTCCTCCAAACTAGGGCCAGTTGCATCCACTTACCAGGTCTGTTTCCTCATTTGCATACCAAGCTGGCTGGACCAACCTCAGGATTTCCAAACCCAATTGTGCGTGGCATCATCTGGAGATCTCTCGATCTCGGCTCTTCTGCACAACTCAACTAATCTGACCCTCCTCAGCTAATCTGACCCTCCGCTTTATGCGGTAGAGTTTTCCAGAGCTGCCCCAGGGGGTTCTGGGGACATCAGGACCAAGACTTCGCTGACCCTGGCAGTCTGTGCACCGGAGTTGGCTCCTTTCCCTCTTAAACTTGTGCAAGAGATCGCTGAGCGATGAAGGTAGAATTATGGTCCTCCTTGCCCTTGCCTTTCCTTTTTGTGATCTCAAAGCATCCTCCCTCCGCCCCCATTCCATGGCCCCAGTTCCCTACTCCCACAGCTGTCTGCTGAAACTGCCAACATTACTCAATTGTTTCTGGGGGGAGGAACATTTTTTTTTGAAACAAAATAGATATATGAAACAGTACACGGGAATTAACACGAATATTTAAGGTAAAACATGACCTTGAAGATTATGAAATCCATCTTATTTTGGCCCAGAACGGGGGCATTGGGCTCCTTGGGCCATAGGGGAGCTGGGGAGGACAGGGTGAAGAGTTAGCTCTAAGCCCTCTGCTTGGAGATGCTGTAAATACAGAACGCAAAATCACCTTCGAAGTTAAAGACGCGAAGTTCTTCTTTACTCGGCCCCTCCTCCCCTCCCCCCCGCCAATTCCCTCCAGTTACAGCTAGCATCCAGGTCCCGGGAGGTGAAGAAGGAGACTTCGGCTCCAGTTACAGCTAGCATCCGGGTCCCGATTTAGAAGGAGCTGCCAATTACAGCGCGGTTCCAGGGCTGAGCAAAAAGCCTGAGGAGCCAAGTGGGAGAGGGAGTAAAACTACTGAATTGGGCCACAAGCAAATGAATAAACTGAACGACTCTTAACCAAACCTAATATATTTAATCCAAACACACAAGTCTTTCATTTCTTCCCTCCTCCCTTCCTTCTCTTACTCCCCAACACCCCCTCTTCAAGCACAATTAATTATATGGTTAGATTCTACTGCGTGATCAGCCCTGTTCTAGGTGGTGGGCACGCCAAGGTGAATGAGACCAAACAAGAGTCTTGCCCTCATGGGGTTTACATTTGGAGACAGAGTCGATCTGTTGCCCAACCTGGAGTGCAGTGGCGCGATCACAGCTCACTGCAGCCTCAAACTCCCTGGCTCAAGGGGTTCTCCCACCTGAGCCTCCCGACTAGCTGGGACCACAGGTGCACGCCACGACGCCTGGGTTTGTTTGTTTGTTTAATAGAGACGAAGGTCTCACCATGTTATCTGGGCTCAAGCGATCATCCCCCCTCCTCCTCCTAAAGTACTGGGATTACAGTCCCAAGCTATCTTGCCCGACCTGGGAAACAGACGTTAAGGAAGATAACAATCTATTTTCAGAGAGCGAGTTTATAAAACCAATGCAATGGGTAAATATGAAGTGTGAATAGGAGGAGAAGCTAAAGAGTGGTCGGAGAATCTAATGCAAGCTACGGGAGAAAGAAACTCAAGTGCAAATGCTGCCTCAGGAATAAACGTAAAAAGAGACTTTCAAGTGCAAATGCTCCCTCAGGAATAAAATAATCTTGAGACTCTCAAGTGTAAATGCTGCCTCGGGAGAACCGAACGGCGAGCTGGAGCCCATACGCAACGAGATTAGAGAGGAAGGCAGAAGCCAGAGCACATGAATAAATGAGCATCCATTTTGTTTCAGAAATGATCGGAAACCATTTGTGGGTTTGTAGAAGCAGGCATGCGTAGGGAAGCTACGGGATTCCGCCGAGGAGCGCCAGAGCCTGAGGCGCCCTTTGGTTATCGCAAGCTGGCTGGCTCACTCCGCACCAGGTGCAAAAGATGCCTGGGGATGCGGGAAGGGAAAGGCCACATCTTCACGCCTTCGCGCCTGGCATTGTGAGCAACCACTGAGACTCATTATATAACACTCGTTTTCTTCTTGCAACCCTGCGGGCCGCGCGGTCGCGCTTTCTCTGCCCTCCGCCGGGTGGACCTGGAGCGCTTGAGCGGTCGGCGCGCCTGGAGCAGCCAGGCGGGCAGTGGACTAGCTGCTGGACCAGGGAGGTGTGGGAGAGCGGTGGCGGCGGGTACATGCACGTGAAGCCATTGCGAGAACTTTATCCATAAGTATTTCAATGCCGGTAGGGACGGCAAGAGAGGAGGGCGGGATGTGCCACACATCTTTGACCTCAGGTTTCTAACGCCTGTTTTCTTTCTGCCCTCTGCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA \ No newline at end of file diff --git a/tests/data/ENST00000304494.tark.model.json b/tests/data/ENST00000304494.tark.model.json new file mode 100644 index 0000000..83ba9e6 --- /dev/null +++ b/tests/data/ENST00000304494.tark.model.json @@ -0,0 +1,133 @@ +{ + "annotations": { + "id": "9", + "type": "record", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21974857 + } + }, + "features": [ + { + "type": "gene", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21995301 + }, + "strand": -1 + }, + "id": "ENSG00000147889", + "qualifiers": { + "name": "CDKN2A", + "assembly_name": "GRCh38", + "version": "18" + }, + "features": [ + { + "type": "mRNA", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21974857 + }, + "strand": -1 + }, + "id": "ENST00000304494", + "qualifiers": { + "assembly_name": "GRCh38", + "tag": "basic", + "version": "10" + }, + "features": [ + { + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21974677 + }, + "end": { + "type": "point", + "position": 21974857 + }, + "strand": -1 + }, + "id": "ENSE00001833804" + }, + { + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21970901 + }, + "end": { + "type": "point", + "position": 21971208 + }, + "strand": -1 + }, + "id": "ENSE00003496053" + }, + { + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21967751 + }, + "end": { + "type": "point", + "position": 21968242 + }, + "strand": -1 + }, + "id": "ENSE00003529527" + }, + { + "type": "CDS", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 21968228 + }, + "end": { + "type": "point", + "position": 21974827 + }, + "strand": -1 + }, + "id": "ENSP00000307101" + } + ] + } + ] + } + ] + }, + "sequence": { + "seq": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTGGGTAGAGGGTCTGCAGCGGGAGCAGGGGATGGCGGGCGACTCTGGAGGACGAAGTTTGCAGGGGAATTGGAATCAGGTAGCGCTTCGATTCTCCGGAAAAAGGGGAGGCTTCCTGGGGAGTTTTCAGAAGGGGTTTGTAATCACAGACCTCCTCCTGGCGACGCCCTGGGGGCTTGGGAAGCCAAGGAAGAGGAATGAGGAGCCACGCGCGTACAGATCTCTCGAATGCTGAGAAGATCTGAAGGGGGGAACATATTTGTATTAGATGGAAGTATGCTCTTTATCAGATACAAAATTTACGAACGTTTGGGATAAAAAGGGAGTCTTAAAGAAATGTAAGATGTGCTGGGACTACTTAGCCTCCAATTCACAGATACCTGGATGGAGCTTATCTTTCTTACTAGGAGGGATTATCAGTGGAAATCTGTGGTGTATGTTGGAATAAATATCGAATATAAATTTTGATCGAAATTATTCAGAAGCGGCCGGGCGCGGTGCCTCACGCCTTGTAATCCCTTCACTTTGGGAGATCAAGGCGGGGGGAATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGGCCAACAGGTGAAACCTCGCCTCTACTAAAAATACAAAAAGTAGCCGGGGGTGGTGGCAGGCGCCTGTAATCCCAGCTACTCGGGAGGTTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCTGAGGTTGTAGTGAACAGCGAGATGGAGCCACTTCACTCCAGCCTGGGTGACAGAGTGAGACTTTGTCGAAAGAAAGAAAGAGAGAAAGAGAGAGAGAAAAATTATTCAGAAGCAACTACATATTGTGTTTATTTTTAACTGAGTAGGGCAAATAAATATATGTTTGCTGTAGGAACTTAGGAAATAATGAGCCACATTCATGTGATCATTCCAGAGGTAATATGTAGTTACCATTTTGGGAATATCTGCTAACATTTTTGCTCTTTTACTATCTTTAGCTTACTTGATATAGTTTATTTGTGATAAGAGTTTTCAATTCCTCATTTTTGAACAGAGGTGTTTCTCCTCTCCCTACTCCTGTTTTGTGAGGGAGTTAGGGGAGGATTTAAAAGTAATTAATACATGGGTAACTTAGCATCTCTAAAATTTTGCCAACAGCTTGAACCCGGGAGTTTGGCTTTGTAGTCCTACAATATCTTAGAAGAGACCTTATTTGTTTAAAAACAAAAAGGAAAAAGAAAAGTGGATAGTTTTGACAATTTTTAATGGAGACGGGAGAAGAACATGTAGAAAAGGGGAAATGATGTTGGCTTAGAATCCTAACTACATTGGTGTTTAATATAGGAACATTTATTTATATAACATTTTAAAGTACTAAATTCATATTAGTATATTATCAAATGGATATATTATCAAATGGGTTTAAGCATCCTACACATTTTAATTCAATTGATTCATTTTCTTTTTGCTTTGGATTTCTATCATGATTTAAATATTTACATATGGGTTACTTTTTAGATTTTTCATACTATGAAATATAAGAAAAACCTTTAAGGCTAGTTTTATGACCAAGACGAAGGACTTCATTGAATACACAAAACAATAAATATACTGCAACATTTTGTCTTTCTTTTTGTAGCTGCAATTTGGTTTGCTTATACTTTCTCTTTGTCTCTTTGAAAACTGAGTCAGTTTCACTTTCTCAGGACAGGATTTAATAACCATAATATAATTTAGTATAATTCCTTGATTTAGGCAAATTATGCAATTTGTGTTTAGTATGAAATGTACCTAAAAATAAGTAACTCCTCTTTAACACCACCATCCTCAAACTAATATAACAAATAACAGTTATCCTAAAATAAATTGTCTACTTCCACCATGCAGCACTCAAATTTTAAGGTTGCTATGACTGCAGACAGTATTTTAAAATTCCTCTCTGGAAATGGCTTTGTTTCCAAGATGATTTAGGAACCAAAGAGGTGACCATCTCTTGTTTAATGAACTCTCAAATCATAAACCTGGGAAGTGTTTTAGTTTCCTACTGCTGCTGTTACAAATTATCACAAATGTGTTAGCTAAAACAAACACAAAATTATTATTTTACAGTTCTAGAGATCAGAAGTCAAAAATGGGTCCACAAGGTTTCATTCCTTTTGGAAACTCTAAGGGGCAATCTGTTTCCTTGTCTTTTCCAGCTTCTAGTGACCATCAAATTCCTTGGCTCATGGTCTCTGTATTTTCTCTGTGGCCTGTGCTTCCATTCTTGTATCTTCTCTCTGACTGTGACCCTCTAATAAAAACACTTGGGGTTATGTTGGGCCCACCCTGAAAATTCTGGATAATCTCCCTCAAGACCATTAATTAAATCACATCTGCAAAGCCTCTTTTGCCACATAAGTTAATGTATTAAAAGTTTTTGAGGATTAGGACATAGACATTGGGGGTGGGGGGGCATTATTCAGCCTACCACAGGAAGGAATTTTAGGGTTAATTAAACTAGCCTTCTTATTTTATACTTGAAGAAATTGAAGTTTTGGAATTGGAGAGCATTATGCTAAATGAAATAAGCCAAACACAGAAAGACAAATATCACATGTTCTCACTTATCTGTGAAATATAAAACAATTACATTCTTAGCAGTAAAGAGTAGAATGGTGGTTACTAGAGCTGGGGGGTGGGAGGAATGGGGAGATGGTAATCAAGATATAAAGCCTCAGTTAAGATGGGAGGAATAAGTTTGATTGTTTTTTTTGAGATGTGTTTCATAGCATGATGAATATAGCTAAATAGTAAATCCCAAATGCTCTCATTTGACAAAAATGTCAAATATTTGAGATGATGGATAGGTTACTTAGCTTGACTTAATAATTCCCCATTGTGTTCAAAGATCATAACTTCATATTGTACCACATAAATATATACAACTGTACTATCCCAATATATAATTTTAAAACTAATATAATGAAAAAGAAATTGAAGTTCAACATTCCCAGAAGCTAAGTGTAACTTAAAAGTTTTGTGAGAATTTGTTTTAACAAACAAACAAGTTTTCTCTTTTTAACAATTACCACATTCTGCGCTTGGATATACAGCAGTGAACAAAAAAAAAAAAAAAAATCTCCAGGCCTAACATAATTTCAGGAAGAAATTTCAGTAGTTGTATCTCAGGGGAAATACAGGAAGTTAGCCTGGAGTAAAAGTCAGTCTGTCCCTGCCCCTTTGCTATTTTGCCCGTGCCTCACAGTGCTCTCTGCCTGTGACGACAGCTCCGCAGAAGTTCGGAGGATATAATGGAATTCATTGTGTACTGAAGAATGGATAGAGAACTCAAGAAGGAAATTGGAAACTGGAAGCAAATGTAGGGGTAATTAGACACCTGGGGCTTGTGTGGGGGTCTGCTTGGCGGTGAGGGGGCTCTACACAAGCTTCCTTTCCGTCATGCCGGCCCCCACCCTGGCTCTGACCATTCTGTTCTCTCTGGCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGGTGAGGACTGATGATCTGAGAATTTGTACCCTGAGAGCTTCCAAAGCTCAGAGCATTCATTTTCCAGCACAGAAAGTTCAGCCCGGGAGACCAGTCTCCGGTCTTGCCTCAGCTCACGCGCCAATCGGTGGGACGGCCTGAGTCTCCCTATCGCCCTGCCCCGCCAGGGCGGCAAATGGGAAATAATCCCGAAATGGACTTGCGCACGTGAAAGCCCATTTTGTACATTATACTTCCCAAAGCATACCACCACCCAAACACCTACCCTCTGCTAGTTCAAGGCCTAGACTGCGGAGCAATGAAGACTCAAGAGGCTAGAGGTCTAGTGCCCCCTCTTCCTCCAAACTAGGGCCAGTTGCATCCACTTACCAGGTCTGTTTCCTCATTTGCATACCAAGCTGGCTGGACCAACCTCAGGATTTCCAAACCCAATTGTGCGTGGCATCATCTGGAGATCTCTCGATCTCGGCTCTTCTGCACAACTCAACTAATCTGACCCTCCTCAGCTAATCTGACCCTCCGCTTTATGCGGTAGAGTTTTCCAGAGCTGCCCCAGGGGGTTCTGGGGACATCAGGACCAAGACTTCGCTGACCCTGGCAGTCTGTGCACCGGAGTTGGCTCCTTTCCCTCTTAAACTTGTGCAAGAGATCGCTGAGCGATGAAGGTAGAATTATGGTCCTCCTTGCCCTTGCCTTTCCTTTTTGTGATCTCAAAGCATCCTCCCTCCGCCCCCATTCCATGGCCCCAGTTCCCTACTCCCACAGCTGTCTGCTGAAACTGCCAACATTACTCAATTGTTTCTGGGGGGAGGAACATTTTTTTTTGAAACAAAATAGATATATGAAACAGTACACGGGAATTAACACGAATATTTAAGGTAAAACATGACCTTGAAGATTATGAAATCCATCTTATTTTGGCCCAGAACGGGGGCATTGGGCTCCTTGGGCCATAGGGGAGCTGGGGAGGACAGGGTGAAGAGTTAGCTCTAAGCCCTCTGCTTGGAGATGCTGTAAATACAGAACGCAAAATCACCTTCGAAGTTAAAGACGCGAAGTTCTTCTTTACTCGGCCCCTCCTCCCCTCCCCCCCGCCAATTCCCTCCAGTTACAGCTAGCATCCAGGTCCCGGGAGGTGAAGAAGGAGACTTCGGCTCCAGTTACAGCTAGCATCCGGGTCCCGATTTAGAAGGAGCTGCCAATTACAGCGCGGTTCCAGGGCTGAGCAAAAAGCCTGAGGAGCCAAGTGGGAGAGGGAGTAAAACTACTGAATTGGGCCACAAGCAAATGAATAAACTGAACGACTCTTAACCAAACCTAATATATTTAATCCAAACACACAAGTCTTTCATTTCTTCCCTCCTCCCTTCCTTCTCTTACTCCCCAACACCCCCTCTTCAAGCACAATTAATTATATGGTTAGATTCTACTGCGTGATCAGCCCTGTTCTAGGTGGTGGGCACGCCAAGGTGAATGAGACCAAACAAGAGTCTTGCCCTCATGGGGTTTACATTTGGAGACAGAGTCGATCTGTTGCCCAACCTGGAGTGCAGTGGCGCGATCACAGCTCACTGCAGCCTCAAACTCCCTGGCTCAAGGGGTTCTCCCACCTGAGCCTCCCGACTAGCTGGGACCACAGGTGCACGCCACGACGCCTGGGTTTGTTTGTTTGTTTAATAGAGACGAAGGTCTCACCATGTTATCTGGGCTCAAGCGATCATCCCCCCTCCTCCTCCTAAAGTACTGGGATTACAGTCCCAAGCTATCTTGCCCGACCTGGGAAACAGACGTTAAGGAAGATAACAATCTATTTTCAGAGAGCGAGTTTATAAAACCAATGCAATGGGTAAATATGAAGTGTGAATAGGAGGAGAAGCTAAAGAGTGGTCGGAGAATCTAATGCAAGCTACGGGAGAAAGAAACTCAAGTGCAAATGCTGCCTCAGGAATAAACGTAAAAAGAGACTTTCAAGTGCAAATGCTCCCTCAGGAATAAAATAATCTTGAGACTCTCAAGTGTAAATGCTGCCTCGGGAGAACCGAACGGCGAGCTGGAGCCCATACGCAACGAGATTAGAGAGGAAGGCAGAAGCCAGAGCACATGAATAAATGAGCATCCATTTTGTTTCAGAAATGATCGGAAACCATTTGTGGGTTTGTAGAAGCAGGCATGCGTAGGGAAGCTACGGGATTCCGCCGAGGAGCGCCAGAGCCTGAGGCGCCCTTTGGTTATCGCAAGCTGGCTGGCTCACTCCGCACCAGGTGCAAAAGATGCCTGGGGATGCGGGAAGGGAAAGGCCACATCTTCACGCCTTCGCGCCTGGCATTGTGAGCAACCACTGAGACTCATTATATAACACTCGTTTTCTTCTTGCAACCCTGCGGGCCGCGCGGTCGCGCTTTCTCTGCCCTCCGCCGGGTGGACCTGGAGCGCTTGAGCGGTCGGCGCGCCTGGAGCAGCCAGGCGGGCAGTGGACTAGCTGCTGGACCAGGGAGGTGTGGGAGAGCGGTGGCGGCGGGTACATGCACGTGAAGCCATTGCGAGAACTTTATCCATAAGTATTTCAATGCCGGTAGGGACGGCAAGAGAGGAGGGCGGGATGTGCCACACATCTTTGACCTCAGGTTTCTAACGCCTGTTTTCTTTCTGCCCTCTGCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", + "description": "ENST00000304494.10 chromosome:GRCh38:9:21967752:21974857:-1" + } +} \ No newline at end of file diff --git a/tests/data/ENST00000304494.tark_raw.json b/tests/data/ENST00000304494.tark_raw.json new file mode 100644 index 0000000..29ea71c --- /dev/null +++ b/tests/data/ENST00000304494.tark_raw.json @@ -0,0 +1,578 @@ +{ + "count": 5, + "next": null, + "previous": null, + "results": [ + { + "stable_id": "ENST00000304494", + "stable_id_version": 6, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EB92F4E6FD93748288E662923ED722E1DFD217CE", + "exon_set_checksum": "EA54C10367494ACFCE2C62B8AA9C769F786177BA", + "transcript_checksum": "A8DF4B069BC282AB3BD9A14469DEB9C9C2475D3F", + "sequence": { + "sequence": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "seq_checksum": "333B6F94ADA93A896B846BC5C7AEB96E94820586" + }, + "biotype": "protein_coding", + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_checksum": "70052E171F9EE5D232FF603466B86D58860234A6", + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_checksum": "DFA62E34AC3102492C6D9F0FB409A82BD35936D9", + "translations": [ + { + "stable_id": "ENSP00000307101", + "stable_id_version": 5, + "assembly": "GRCh38", + "loc_start": 21968229, + "loc_end": 21974827, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "105755D5C4F7A1A01F50DDF4A451CEE8004002A2", + "translation_id": 176189, + "translation_checksum": "1C48A45C9C622B7CFD8CD09854FF8D7DBD3D1BB2" + } + ], + "genes": [ + { + "stable_id": "ENSG00000147889", + "stable_id_version": 13, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "B3E7AB60256CD76F74C1C722A11970E2F7703772", + "name": "CDKN2A", + "gene_checksum": "B4D05BCE038A7C06BF4FCC6C9059EF16D3FACC67" + } + ], + "exons": [ + { + "exon_id": 2185905, + "stable_id": "ENSE00001833804", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21974678, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EE821B3A3ED8D819F162D8D69EF6A075A838360C", + "exon_checksum": "C390D4A5F64CD3AB88BEAA1BD3830EA847A60726", + "exon_order": 1 + }, + { + "exon_id": 2185928, + "stable_id": "ENSE00003496053", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21970902, + "loc_end": 21971208, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "4F25E374539A33AA0B4AD698A0011A58DC547920", + "exon_checksum": "15440459F0F3192E523F6122E233671C0D5B5466", + "exon_order": 2 + }, + { + "exon_id": 2185952, + "stable_id": "ENSE00003529527", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21968242, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "D4F0BD05A559F79A9EB1822B6292BA756AF0C109", + "exon_checksum": "56E5BF98EFE9F3A667B8E34C15D1EB420267ABE0", + "exon_order": 3 + } + ], + "cds_info": { + "translation_start": 21968229, + "translation_end": 21974827, + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_length": 476, + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_length": 271, + "loc_region": "9", + "loc_strand": -1, + "cds_seq": "ATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGA" + } + }, + { + "stable_id": "ENST00000304494", + "stable_id_version": 7, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EB92F4E6FD93748288E662923ED722E1DFD217CE", + "exon_set_checksum": "EA54C10367494ACFCE2C62B8AA9C769F786177BA", + "transcript_checksum": "D89AED3D3AB6D7B76DD7DEC4F551DA6D44396E55", + "sequence": { + "sequence": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "seq_checksum": "333B6F94ADA93A896B846BC5C7AEB96E94820586" + }, + "biotype": "protein_coding", + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_checksum": "70052E171F9EE5D232FF603466B86D58860234A6", + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_checksum": "DFA62E34AC3102492C6D9F0FB409A82BD35936D9", + "translations": [ + { + "stable_id": "ENSP00000307101", + "stable_id_version": 5, + "assembly": "GRCh38", + "loc_start": 21968229, + "loc_end": 21974827, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "105755D5C4F7A1A01F50DDF4A451CEE8004002A2", + "translation_id": 176189, + "translation_checksum": "1C48A45C9C622B7CFD8CD09854FF8D7DBD3D1BB2" + } + ], + "genes": [ + { + "stable_id": "ENSG00000147889", + "stable_id_version": 14, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "B3E7AB60256CD76F74C1C722A11970E2F7703772", + "name": "CDKN2A", + "gene_checksum": "8012D9147D7D0119B3DD16952CED05AA878A6E25" + } + ], + "exons": [ + { + "exon_id": 2185905, + "stable_id": "ENSE00001833804", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21974678, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EE821B3A3ED8D819F162D8D69EF6A075A838360C", + "exon_checksum": "C390D4A5F64CD3AB88BEAA1BD3830EA847A60726", + "exon_order": 1 + }, + { + "exon_id": 2185928, + "stable_id": "ENSE00003496053", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21970902, + "loc_end": 21971208, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "4F25E374539A33AA0B4AD698A0011A58DC547920", + "exon_checksum": "15440459F0F3192E523F6122E233671C0D5B5466", + "exon_order": 2 + }, + { + "exon_id": 2185952, + "stable_id": "ENSE00003529527", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21968242, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "D4F0BD05A559F79A9EB1822B6292BA756AF0C109", + "exon_checksum": "56E5BF98EFE9F3A667B8E34C15D1EB420267ABE0", + "exon_order": 3 + } + ], + "cds_info": { + "translation_start": 21968229, + "translation_end": 21974827, + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_length": 476, + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_length": 271, + "loc_region": "9", + "loc_strand": -1, + "cds_seq": "ATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGA" + } + }, + { + "stable_id": "ENST00000304494", + "stable_id_version": 8, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EB92F4E6FD93748288E662923ED722E1DFD217CE", + "exon_set_checksum": "EA54C10367494ACFCE2C62B8AA9C769F786177BA", + "transcript_checksum": "E1EBDFB807CF9E42B9F9F0E955A56D6BE3498DB2", + "sequence": { + "sequence": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "seq_checksum": "333B6F94ADA93A896B846BC5C7AEB96E94820586" + }, + "biotype": "protein_coding", + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_checksum": "70052E171F9EE5D232FF603466B86D58860234A6", + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_checksum": "DFA62E34AC3102492C6D9F0FB409A82BD35936D9", + "translations": [ + { + "stable_id": "ENSP00000307101", + "stable_id_version": 5, + "assembly": "GRCh38", + "loc_start": 21968229, + "loc_end": 21974827, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "105755D5C4F7A1A01F50DDF4A451CEE8004002A2", + "translation_id": 176189, + "translation_checksum": "1C48A45C9C622B7CFD8CD09854FF8D7DBD3D1BB2" + } + ], + "genes": [ + { + "stable_id": "ENSG00000147889", + "stable_id_version": 15, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "B3E7AB60256CD76F74C1C722A11970E2F7703772", + "name": "CDKN2A", + "gene_checksum": "A578BC8DA1AB7AAD2230129DA297597F214EEF81" + } + ], + "exons": [ + { + "exon_id": 2185905, + "stable_id": "ENSE00001833804", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21974678, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EE821B3A3ED8D819F162D8D69EF6A075A838360C", + "exon_checksum": "C390D4A5F64CD3AB88BEAA1BD3830EA847A60726", + "exon_order": 1 + }, + { + "exon_id": 2185928, + "stable_id": "ENSE00003496053", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21970902, + "loc_end": 21971208, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "4F25E374539A33AA0B4AD698A0011A58DC547920", + "exon_checksum": "15440459F0F3192E523F6122E233671C0D5B5466", + "exon_order": 2 + }, + { + "exon_id": 2185952, + "stable_id": "ENSE00003529527", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21968242, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "D4F0BD05A559F79A9EB1822B6292BA756AF0C109", + "exon_checksum": "56E5BF98EFE9F3A667B8E34C15D1EB420267ABE0", + "exon_order": 3 + } + ], + "cds_info": { + "translation_start": 21968229, + "translation_end": 21974827, + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_length": 476, + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_length": 271, + "loc_region": "9", + "loc_strand": -1, + "cds_seq": "ATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGA" + } + }, + { + "stable_id": "ENST00000304494", + "stable_id_version": 9, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EB92F4E6FD93748288E662923ED722E1DFD217CE", + "exon_set_checksum": "EA54C10367494ACFCE2C62B8AA9C769F786177BA", + "transcript_checksum": "628671997542287CC1B86771204208D4A4D1A973", + "sequence": { + "sequence": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "seq_checksum": "333B6F94ADA93A896B846BC5C7AEB96E94820586" + }, + "biotype": "protein_coding", + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_checksum": "70052E171F9EE5D232FF603466B86D58860234A6", + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_checksum": "DFA62E34AC3102492C6D9F0FB409A82BD35936D9", + "translations": [ + { + "stable_id": "ENSP00000307101", + "stable_id_version": 5, + "assembly": "GRCh38", + "loc_start": 21968229, + "loc_end": 21974827, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "105755D5C4F7A1A01F50DDF4A451CEE8004002A2", + "translation_id": 176189, + "translation_checksum": "1C48A45C9C622B7CFD8CD09854FF8D7DBD3D1BB2" + } + ], + "genes": [ + { + "stable_id": "ENSG00000147889", + "stable_id_version": 16, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "B3E7AB60256CD76F74C1C722A11970E2F7703772", + "name": "CDKN2A", + "gene_checksum": "CBE2ED5AA28824986FC227972982D57DC281B899" + }, + { + "stable_id": "ENSG00000147889", + "stable_id_version": 17, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "B3E7AB60256CD76F74C1C722A11970E2F7703772", + "name": "CDKN2A", + "gene_checksum": "0A801F52D0D8036411E4D603DA189AB0CD5FDDB0" + }, + { + "stable_id": "ENSG00000147889", + "stable_id_version": 17, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "B3E7AB60256CD76F74C1C722A11970E2F7703772", + "name": null, + "gene_checksum": "CDA1742E54A0A13D1B746B9A089E200D0D654A1E" + } + ], + "exons": [ + { + "exon_id": 2185905, + "stable_id": "ENSE00001833804", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21974678, + "loc_end": 21975098, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "EE821B3A3ED8D819F162D8D69EF6A075A838360C", + "exon_checksum": "C390D4A5F64CD3AB88BEAA1BD3830EA847A60726", + "exon_order": 1 + }, + { + "exon_id": 2185928, + "stable_id": "ENSE00003496053", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21970902, + "loc_end": 21971208, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "4F25E374539A33AA0B4AD698A0011A58DC547920", + "exon_checksum": "15440459F0F3192E523F6122E233671C0D5B5466", + "exon_order": 2 + }, + { + "exon_id": 2185952, + "stable_id": "ENSE00003529527", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21967753, + "loc_end": 21968242, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "D4F0BD05A559F79A9EB1822B6292BA756AF0C109", + "exon_checksum": "56E5BF98EFE9F3A667B8E34C15D1EB420267ABE0", + "exon_order": 3 + } + ], + "cds_info": { + "translation_start": 21968229, + "translation_end": 21974827, + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967753, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTC", + "three_prime_utr_length": 476, + "five_prime_utr_start": 21975098, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "CCCAACCTGGGGCGACTTCAGGGGTGCCACATTCGCTAAGTGCTCGGAGTTAATAGCACCTCCTCCGAGCACTCGCTCACGGCGTCCCCTTGCCTGGAAAGATACCGCGGTCCCTCCAGAGGATTTGAGGGACAGGGTCGGAGGGGGCTCTTCCGCCAGCACCGGAGGAAGAAAGAGGAGGGGCTGGCTGGTCACCAGAGGGTGGGGCGGACCGCGTGCGCTCGGCGGCTGCGGAGAGGGGGAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_length": 271, + "loc_region": "9", + "loc_strand": -1, + "cds_seq": "ATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGA" + } + }, + { + "stable_id": "ENST00000304494", + "stable_id_version": 10, + "assembly": "GRCh38", + "loc_start": 21967752, + "loc_end": 21974857, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "F8C16E95E11B4DBAD97B78D927A868E7D1C8F08E", + "exon_set_checksum": "C0DB9C7510347D5058F8D6A0103B25CEB7A5B7F2", + "transcript_checksum": "C744046AE49A2A1FCAFE45B5E1DB219BD8D1E710", + "sequence": { + "sequence": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGCATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGAAAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", + "seq_checksum": "790809E0CD049743CFC6338048865F256AA64D3F" + }, + "biotype": "protein_coding", + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967752, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", + "three_prime_utr_checksum": "04EBC613707A51244E4725491FF9A67F674274E3", + "five_prime_utr_start": 21974857, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_checksum": "33BCE24430AB5E5064811BCAF5AFFB9DD9FB1620", + "translations": [ + { + "stable_id": "ENSP00000307101", + "stable_id_version": 5, + "assembly": "GRCh38", + "loc_start": 21968229, + "loc_end": 21974827, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "105755D5C4F7A1A01F50DDF4A451CEE8004002A2", + "translation_id": 176189, + "translation_checksum": "1C48A45C9C622B7CFD8CD09854FF8D7DBD3D1BB2" + } + ], + "genes": [ + { + "stable_id": "ENSG00000147889", + "stable_id_version": 18, + "assembly": "GRCh38", + "loc_start": 21967752, + "loc_end": 21995301, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "ED15957CEA6CCF777595B51C1CB825B646B57347", + "name": "CDKN2A", + "gene_checksum": "AEEBF97735FA3B5F87BE93069A64D1C96734A339" + } + ], + "exons": [ + { + "exon_id": 57506579, + "stable_id": "ENSE00001833804", + "stable_id_version": 2, + "assembly": "GRCh38", + "loc_start": 21974678, + "loc_end": 21974857, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "8784F1FDE2A72E091CBD1198F21D74B2A43E28B7", + "exon_checksum": "11B77CAE42B359F07D425E90D7B2958419F6009D", + "exon_order": 1 + }, + { + "exon_id": 2185928, + "stable_id": "ENSE00003496053", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 21970902, + "loc_end": 21971208, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "4F25E374539A33AA0B4AD698A0011A58DC547920", + "exon_checksum": "15440459F0F3192E523F6122E233671C0D5B5466", + "exon_order": 2 + }, + { + "exon_id": 57506621, + "stable_id": "ENSE00003529527", + "stable_id_version": 2, + "assembly": "GRCh38", + "loc_start": 21967752, + "loc_end": 21968242, + "loc_strand": -1, + "loc_region": "9", + "loc_checksum": "695C89D1D5FD5C182A5AB67FCF461C8915DB0F25", + "exon_checksum": "2C6867673825E460D80853A074DA46A6824434A5", + "exon_order": 3 + } + ], + "mane_transcript": "NM_000077.5", + "mane_transcript_type": "MANE SELECT", + "cds_info": { + "translation_start": 21968229, + "translation_end": 21974827, + "three_prime_utr_start": 21968228, + "three_prime_utr_end": 21967752, + "three_prime_utr_seq": "AAGAACCAGAGAGGCTCTGAGAAACCTCGGGAAACTTAGATCATCAGTCACCGAAGGTCCTACAGGGCCACAACTGCCCCCGCCACAACCCACCCCGCTTTCGTAGTTTTCATTTAGAAAATAGAGCTTTTAAAAATGTCCTGCCTTTTAACGTAGATATATGCCTTCCCCCACTACCGTAAATGTCCATTTATATCATTTTTTATATATTCTTATAAAAATGTAAAAAAGAAAAACACCGCTTCTGCCTTTTCACTGTGTTGGAGTTTTCTGGAGTGAGCACTCACGCCCTAAGCGCACATTCATGTGGGCATTTCTTGCGAGCCTCGCAGCCTCCGGAAGCTGTCGACTTCATGACAAGCATTTTGTGAACTAGGGAAGCTCAGGGGGGTTACTGGCTTCTCTTGAGTCACACTGCTAGCAAATGGCAGAACCAAAGCTCAAATAAAAATAAAATAATTTTCATTCATTCACTCA", + "three_prime_utr_length": 477, + "five_prime_utr_start": 21974857, + "five_prime_utr_end": 21974828, + "five_prime_utr_seq": "GAGAGCAGGCAGCGGGCGGCGGGGAGCAGC", + "five_prime_utr_length": 30, + "loc_region": "9", + "loc_strand": -1, + "cds_seq": "ATGGAGCCGGCGGCGGGGAGCAGCATGGAGCCTTCGGCTGACTGGCTGGCCACGGCCGCGGCCCGGGGTCGGGTAGAGGAGGTGCGGGCGCTGCTGGAGGCGGGGGCGCTGCCCAACGCACCGAATAGTTACGGTCGGAGGCCGATCCAGGTCATGATGATGGGCAGCGCCCGAGTGGCGGAGCTGCTGCTGCTCCACGGCGCGGAGCCCAACTGCGCCGACCCCGCCACTCTCACCCGACCCGTGCACGACGCTGCCCGGGAGGGCTTCCTGGACACGCTGGTGGTGCTGCACCGGGCCGGGGCGCGGCTGGACGTGCGCGATGCCTGGGGCCGTCTGCCCGTGGACCTGGCTGAGGAGCTGGGCCATCGCGATGTCGCACGGTACCTGCGCGCGGCTGCGGGGGGCACCAGAGGCAGTAACCATGCCCGCATAGATGCCGCGGAAGGTCCCTCAGACATCCCCGATTGA" + } + } + ] +} \ No newline at end of file diff --git a/tests/data/ENST00000383925.1.sequence b/tests/data/ENST00000383925.1.sequence new file mode 100644 index 0000000..bbdd75a --- /dev/null +++ b/tests/data/ENST00000383925.1.sequence @@ -0,0 +1 @@ +ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG \ No newline at end of file diff --git a/tests/data/ENST00000383925.1.tark.model.json b/tests/data/ENST00000383925.1.tark.model.json new file mode 100644 index 0000000..b010a43 --- /dev/null +++ b/tests/data/ENST00000383925.1.tark.model.json @@ -0,0 +1,85 @@ +{ + "annotations": { + "id": "1", + "type": "record", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + } + }, + "features": [ + { + "id": "ENSG00000206652", + "type": "gene", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + }, + "strand": -1 + }, + "qualifiers": { + "name": "RNU1-1", + "assembly_name": "GRCh38", + "version": "1" + }, + "features": [ + { + "id": "ENST00000383925", + "type": "snRNA", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + }, + "strand": -1 + }, + "qualifiers": { + "assembly_name": "GRCh38", + "tag": "basic", + "version": "1" + }, + "features": [ + { + "id": "ENSE00001808303", + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + }, + "strand": -1 + } + } + ] + } + ] + } + ] + }, + "sequence": { + "seq": "ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG", + "description": "ENST00000383925.1 chromosome:GRCh38:1:16514122:16514285:-1" + } +} \ No newline at end of file diff --git a/tests/data/ENST00000383925.1.tark_raw.json b/tests/data/ENST00000383925.1.tark_raw.json new file mode 100644 index 0000000..12aea08 --- /dev/null +++ b/tests/data/ENST00000383925.1.tark_raw.json @@ -0,0 +1,75 @@ +{ + "count": 1, + "next": null, + "previous": null, + "results": [ + { + "stable_id": "ENST00000383925", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 16514122, + "loc_end": 16514285, + "loc_strand": -1, + "loc_region": "1", + "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", + "exon_set_checksum": "E4F0A73FB88745A75017E2CEF1F5806DA67E559A", + "transcript_checksum": "B29859A7C558F2A19CD3EBAAFE24855E6FEC8299", + "sequence": { + "sequence": "ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG", + "seq_checksum": "107154F0C6B845AFEDDE5E4343C8B0A770D88439" + }, + "biotype": "snRNA", + "three_prime_utr_start": null, + "three_prime_utr_end": null, + "three_prime_utr_seq": null, + "three_prime_utr_checksum": null, + "five_prime_utr_start": null, + "five_prime_utr_end": null, + "five_prime_utr_seq": null, + "five_prime_utr_checksum": null, + "translations": [], + "genes": [ + { + "stable_id": "ENSG00000206652", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 16514122, + "loc_end": 16514285, + "loc_strand": -1, + "loc_region": "1", + "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", + "name": "RNU1-1", + "gene_checksum": "FB75ECC5191A095BA7BA1D23EB5DDCB3AF32B792" + }, + { + "stable_id": "ENSG00000206652", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 16514122, + "loc_end": 16514285, + "loc_strand": -1, + "loc_region": "1", + "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", + "name": null, + "gene_checksum": "F78320EE4D946C3EADF35CACA4BEECB4CC8501A5" + } + ], + "exons": [ + { + "exon_id": 1712127, + "stable_id": "ENSE00001808303", + "stable_id_version": 1, + "assembly": "GRCh38", + "loc_start": 16514122, + "loc_end": 16514285, + "loc_strand": -1, + "loc_region": "1", + "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", + "exon_checksum": "97B0B82AB8E9C6A87AAFFE8C4B1C5807D580045F", + "exon_order": 1 + } + ], + "cds_info": {} + } + ] +} \ No newline at end of file diff --git a/tests/data/ENST00000383925.sequence b/tests/data/ENST00000383925.sequence new file mode 100644 index 0000000..bbdd75a --- /dev/null +++ b/tests/data/ENST00000383925.sequence @@ -0,0 +1 @@ +ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG \ No newline at end of file diff --git a/tests/data/ENST00000383925.tark.model.json b/tests/data/ENST00000383925.tark.model.json new file mode 100644 index 0000000..b010a43 --- /dev/null +++ b/tests/data/ENST00000383925.tark.model.json @@ -0,0 +1,85 @@ +{ + "annotations": { + "id": "1", + "type": "record", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + } + }, + "features": [ + { + "id": "ENSG00000206652", + "type": "gene", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + }, + "strand": -1 + }, + "qualifiers": { + "name": "RNU1-1", + "assembly_name": "GRCh38", + "version": "1" + }, + "features": [ + { + "id": "ENST00000383925", + "type": "snRNA", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + }, + "strand": -1 + }, + "qualifiers": { + "assembly_name": "GRCh38", + "tag": "basic", + "version": "1" + }, + "features": [ + { + "id": "ENSE00001808303", + "type": "exon", + "location": { + "type": "range", + "start": { + "type": "point", + "position": 16514121 + }, + "end": { + "type": "point", + "position": 16514285 + }, + "strand": -1 + } + } + ] + } + ] + } + ] + }, + "sequence": { + "seq": "ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG", + "description": "ENST00000383925.1 chromosome:GRCh38:1:16514122:16514285:-1" + } +} \ No newline at end of file diff --git a/tests/data/ENST00000383925.tark_raw.json b/tests/data/ENST00000383925.tark_raw.json new file mode 100644 index 0000000..abea87f --- /dev/null +++ b/tests/data/ENST00000383925.tark_raw.json @@ -0,0 +1 @@ +{"count": 1, "next": null, "previous": null, "results": [{"stable_id": "ENST00000383925", "stable_id_version": 1, "assembly": "GRCh38", "loc_start": 16514122, "loc_end": 16514285, "loc_strand": -1, "loc_region": "1", "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", "exon_set_checksum": "E4F0A73FB88745A75017E2CEF1F5806DA67E559A", "transcript_checksum": "B29859A7C558F2A19CD3EBAAFE24855E6FEC8299", "sequence": {"sequence": "ATACTTACCTGGCAGGGGAGATACCATGATCACGAAGGTGGTTTTCCCAGGGCGAGGCTTATCCATTGCACTCCGGATGTGCTGACCCCTGCGATTTCCCCAAATGTGGGAAACTCGACTGCATAATTTGTGGTAGTGGGGGACTGCGTTCGCGCTTTCCCCTG", "seq_checksum": "107154F0C6B845AFEDDE5E4343C8B0A770D88439"}, "biotype": "snRNA", "three_prime_utr_start": null, "three_prime_utr_end": null, "three_prime_utr_seq": null, "three_prime_utr_checksum": null, "five_prime_utr_start": null, "five_prime_utr_end": null, "five_prime_utr_seq": null, "five_prime_utr_checksum": null, "translations": [], "genes": [{"stable_id": "ENSG00000206652", "stable_id_version": 1, "assembly": "GRCh38", "loc_start": 16514122, "loc_end": 16514285, "loc_strand": -1, "loc_region": "1", "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", "name": "RNU1-1", "gene_checksum": "FB75ECC5191A095BA7BA1D23EB5DDCB3AF32B792"}, {"stable_id": "ENSG00000206652", "stable_id_version": 1, "assembly": "GRCh38", "loc_start": 16514122, "loc_end": 16514285, "loc_strand": -1, "loc_region": "1", "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", "name": null, "gene_checksum": "F78320EE4D946C3EADF35CACA4BEECB4CC8501A5"}], "exons": [{"exon_id": 1712127, "stable_id": "ENSE00001808303", "stable_id_version": 1, "assembly": "GRCh38", "loc_start": 16514122, "loc_end": 16514285, "loc_strand": -1, "loc_region": "1", "loc_checksum": "038F34CB0196112C9FEB85595F0EF66CEA231E87", "exon_checksum": "97B0B82AB8E9C6A87AAFFE8C4B1C5807D580045F", "exon_order": 1}], "cds_info": {}}]} \ No newline at end of file diff --git a/tests/test_fetch.py b/tests/test_fetch.py index ad07a68..7482f62 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -1,77 +1,64 @@ -from pathlib import Path - import pytest from mutalyzer_retriever.sources.ensembl import fetch -from mutalyzer_retriever.configuration import settings -API_BASE = settings["ENSEMBL_API"] -API_BASE_GRCH37 = settings["ENSEMBL_API_GRCH37"] +from .commons import _get_content, patch_retriever + -API_BASE_MAP = { - "ENSG00000147889": {"version": 18, "species": "homo_sapiens"}, - "ENSMUSG00000022346": {"version": 18, "species": "mus_musculus"}, -} -API_BASE_GRCH37_MAP = {"ENSG00000147889": {"version": 12, "species": "homo_sapiens"}} +@pytest.mark.parametrize("r_id", [("ENSG00000147889")]) +def test_ensembl_fetch_no_version(r_id): + assert fetch(r_id)[0] == _get_content(f"data/{r_id}.gff3") -@pytest.fixture(autouse=True) -def patch_retriever(monkeypatch): - monkeypatch.setattr("mutalyzer_retriever.sources.ensembl.fetch_gff3", _fetch_gff3) - monkeypatch.setattr( - "mutalyzer_retriever.sources.ensembl._get_reference_information", - _get_reference_information, - ) +@pytest.mark.parametrize("r_id", [("ENSG00000147889.18")]) +def test_ensembl_fetch_version_newest(r_id): + assert fetch(r_id)[0] == _get_content(f"data/{r_id}.gff3") -def _get_content(relative_location): - data_file = Path(__file__).parent.joinpath(relative_location) - with open(str(data_file), "r") as file: - content = file.read() - return content +@pytest.mark.parametrize("r_id", [("ENST00000304494")]) +def test_ensembl_fetch_transcript_no_version(r_id): + assert fetch(r_id)[0] == _get_content(f"data/{r_id}.gff3") -def _fetch_gff3(feature_id, api_base, timeout=1): - if api_base == API_BASE_GRCH37: - return _get_content( - f"data/{feature_id}.{API_BASE_GRCH37_MAP[feature_id]['version']}.gff3" - ) - return _get_content(f"data/{feature_id}.gff3") +@pytest.mark.parametrize("r_id", [("ENST00000304494")]) +def test_ensembl_fetch_transcript_rest_38(r_id): + assert fetch(r_id)[0] == _get_content(f"data/{r_id}.gff3") -def _get_reference_information(reference_id, api_base, timeout=1): - if api_base == API_BASE and reference_id in API_BASE_MAP.keys(): - return API_BASE_MAP[reference_id] - if api_base == API_BASE_GRCH37 and reference_id in API_BASE_GRCH37_MAP.keys(): - return API_BASE_GRCH37_MAP[reference_id] +@pytest.mark.parametrize( + "r_id, r_type, r_source", [("ENST00000304494.5", "json", "ensembl_rest")] +) +def test_ensembl_fetch_transcript_rest_37(r_id, r_type, r_source): + with pytest.raises(ValueError): + fetch(r_id, r_type, r_source) -@pytest.mark.parametrize("reference_id", [("ENSG00000147889")]) -def test_ensembl_fetch_no_version(reference_id): - assert fetch(reference_id)[0] == _get_content(f"data/{reference_id}.gff3") +@pytest.mark.parametrize("r_id, r_type", [("ENST00000304494.7", "json")]) +def test_ensembl_fetch_transcript_tark_38(r_id, r_type): + assert fetch(r_id, r_type)[0] == _get_content(f"data/{r_id}.tark_raw.model.json") -@pytest.mark.parametrize("reference_id", [("ENSG00000147889.18")]) -def test_ensembl_fetch_version_newest(reference_id): - assert fetch(reference_id)[0] == _get_content(f"data/{reference_id}.gff3") +@pytest.mark.parametrize("r_id, r_type", [("ENST00000000000.5", "json")]) +def test_ensembl_fetch_transcript_tark_37(r_id, r_type): + assert fetch(r_id, r_type)[0] == _get_content(f"data/{r_id}.tark_raw.model.json") -@pytest.mark.parametrize("reference_id", [("ENSG00000147889.12")]) -def test_ensembl_fetch_version_grch37(reference_id): - assert fetch(reference_id)[0] == _get_content(f"data/{reference_id}.gff3") +@pytest.mark.parametrize("r_id", [("ENSG00000147889.12")]) +def test_ensembl_fetch_version_grch37(r_id): + assert fetch(r_id)[0] == _get_content(f"data/{r_id}.gff3") -@pytest.mark.parametrize("reference_id", [("ENSG00000147889.15")]) -def test_ensembl_fetch_other_version(reference_id): +@pytest.mark.parametrize("r_id", [("ENSG00000147889.15")]) +def test_ensembl_fetch_other_version(r_id): with pytest.raises(NameError): - fetch(reference_id)[0] + fetch(r_id)[0] is None -@pytest.mark.parametrize("reference_id", [("ENSMUSG00000022346.18")]) -def test_ensembl_fetch_no_version_mouse(reference_id): - assert fetch(reference_id)[0] == _get_content(f"data/{reference_id}.gff3") +@pytest.mark.parametrize("r_id", [("ENSMUSG00000022346.18")]) +def test_ensembl_fetch_no_version_mouse(r_id): + assert fetch(r_id)[0] == _get_content(f"data/{r_id}.gff3") -@pytest.mark.parametrize("reference_id", [("ENSMUSG00000022346")]) -def test_ensembl_fetch_version_newest_mouse(reference_id): - assert fetch(reference_id)[0] == _get_content(f"data/{reference_id}.gff3") +@pytest.mark.parametrize("r_id", [("ENSMUSG00000022346")]) +def test_ensembl_fetch_version_newest_mouse(r_id): + assert fetch(r_id)[0] == _get_content(f"data/{r_id}.gff3") diff --git a/tests/test_model_validation.py b/tests/test_model_validation.py index 8119416..921a126 100644 --- a/tests/test_model_validation.py +++ b/tests/test_model_validation.py @@ -1,3 +1,4 @@ +import json from pathlib import Path import pytest @@ -5,79 +6,56 @@ from mutalyzer_retriever import parser from mutalyzer_retriever.schema_validation import validate +from .commons import patch_retriever, references +from .test_retriever_model import _seq_from_rest + def get_references_content(references): - references_content = [] - for reference_source in references.keys(): - for reference_type in references[reference_source]: - for reference_id in references[reference_source][reference_type]: - path_gb = ( - Path(Path(__file__).parent) - / "data" - / "{}.{}".format(reference_id, reference_type) - ) - with path_gb.open() as f: - reference_content = f.read() - references_content.append( + """Read raw response from tests data folder""" + r_contents = [] + for r_source in references.keys(): + for r_type in references[r_source]: + for r_id in references[r_source][r_type]: + if r_type == "json": + path_gb = ( + Path(Path(__file__).parent) + / "data" + / f"{r_id}.tark_raw.{r_type}" + ) + r_content = json.loads(path_gb.open().read()) + else: + path_gb = Path(Path(__file__).parent) / "data" / f"{r_id}.{r_type}" + with path_gb.open() as f: + r_content = f.read() + r_contents.append( pytest.param( - reference_source, - reference_type, - reference_content, - id="{}-{}-{}".format( - reference_source, reference_type, reference_id - ), + r_source, + r_type, + r_content, + r_id, + id=f"{r_source}-{r_type}-{r_id}", ) ) - return references_content + return r_contents @pytest.mark.parametrize( - "reference_source, reference_type, reference_content", - get_references_content( - { - "ncbi": { - "gff3": [ - "NM_078467.2", - "NM_152263.2", - "NM_152263.3", - "NM_000077.4", - "NM_002001.2", - "NG_012337.1", - "NR_002196.2", - "L41870.1", - "NG_007485.1", - "NC_012920.1", - "NG_009930.1", - "AA010203.1", - "NP_060665.3", - "D64137.1", - "AB006684.1", - "NM_004152.3", - "7", - "M65131.1", - "XR_948219.2", - "NR_023343.1", - ] - }, - "ensembl": { - "gff3": [ - "ENSG00000147889", - "ENST00000383925", - "ENST00000304494", - "ENSG00000198899", - ] - }, - "lrg": {"lrg": ["LRG_11", "LRG_417", "LRG_857"]}, - } - ), + "r_source, r_type, r_content, r_id", get_references_content(references) ) -def test_schema_validation(reference_source, reference_type, reference_content): - reference_model = parser.parse( - reference_content, - reference_type=reference_type, - reference_source=reference_source, +def test_schema_validation( + r_source, r_type, r_content, r_id, monkeypatch: pytest.MonkeyPatch +): + """Parse raw response and check its output schema""" + monkeypatch.setattr( + "mutalyzer_retriever.parsers.json_ensembl._seq_from_rest", + lambda _0, _1, _2, _3, _4: _seq_from_rest(r_id), + ) + r_model = parser.parse( + reference_content=r_content, + reference_type=r_type, + reference_source=r_source, ) - if reference_source == "lrg": - assert validate(reference_model["annotations"]) is None + if r_source in ["ensembl_tark", "lrg"]: + assert validate(r_model["annotations"]) is None else: - assert validate(reference_model) is None + assert validate(r_model) is None diff --git a/tests/test_retriever_model.py b/tests/test_retriever_model.py index ae83733..5381a59 100644 --- a/tests/test_retriever_model.py +++ b/tests/test_retriever_model.py @@ -5,28 +5,7 @@ from mutalyzer_retriever import retrieve_model - -def _get_content(relative_location): - data_file = Path(__file__).parent.joinpath(relative_location) - with open(str(data_file), "r") as file: - content = file.read() - return content - - -def _retrieve_raw( - reference_id, - reference_source=None, - reference_type=None, - size_off=True, - configuration_path=None, - timeout=1, -): - if reference_type == "fasta": - return _get_content("data/" + reference_id + ".fasta"), "fasta", "ncbi" - elif reference_id.startswith("LRG_"): - return _get_content("data/" + reference_id + ".lrg"), "lrg", "lrg" - else: - return _get_content("data/" + reference_id + ".gff3"), "gff3", "ncbi" +from .commons import _get_content, patch_retriever, references def get_tests(references): @@ -36,7 +15,14 @@ def get_tests(references): for r_source in references.keys(): for r_type in references[r_source].keys(): for r_id in references[r_source][r_type]: - p = Path(Path(__file__).parent) / "data" / str(r_id + ".model.json") + if r_type == "json": + p = ( + Path(Path(__file__).parent) + / "data" + / str(r_id + ".tark.model.json") + ) + else: + p = Path(Path(__file__).parent) / "data" / str(r_id + ".model.json") with p.open() as f: r_model = json.loads(f.read()) tests.append( @@ -45,54 +31,23 @@ def get_tests(references): r_source, r_type, r_model, - id="{}-{}-{}".format(r_source, r_type, r_id), + id=f"{r_source}-{r_type}-{r_id}", ) ) return tests +def _seq_from_rest(r_id): + return _get_content("data/" + str(r_id) + ".sequence") + + @pytest.mark.parametrize( - "r_id, r_source, r_type, expected_model", - get_tests( - { - "ncbi": { - "gff3": [ - "NM_078467.2", - "NM_152263.2", - "NM_152263.3", - "NM_000077.4", - "NM_002001.2", - "NG_012337.1", - "NR_002196.2", - "L41870.1", - "NG_007485.1", - "NC_012920.1", - "NG_009930.1", - "AA010203.1", - "NP_060665.3", - "D64137.1", - "AB006684.1", - "NM_004152.3", - "7", - "M65131.1", - "XR_948219.2", - "NR_023343.1", - ] - }, - "ensembl": { - "gff3": [ - "ENSG00000147889", - "ENST00000383925", - "ENST00000304494", - "ENSG00000198899", - ] - }, - "lrg": {"lrg": ["LRG_11", "LRG_417", "LRG_857"]}, - } - ), + "r_id, r_source, r_type, expected_model", get_tests(references) ) -def test_model(r_id, r_source, r_type, expected_model, monkeypatch): - monkeypatch.setattr("mutalyzer_retriever.retriever.retrieve_raw", _retrieve_raw) - - assert retrieve_model(r_id, r_source) == expected_model +def test_model(r_id, r_source, r_type, expected_model, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr( + "mutalyzer_retriever.parsers.json_ensembl._seq_from_rest", + lambda _0, _1, _2, _3, _4: _seq_from_rest(r_id), + ) + assert retrieve_model(r_id, r_source, r_type) == expected_model