diff --git a/gene/etl/merge.py b/gene/etl/merge.py index 00b6e2f5..df360e10 100644 --- a/gene/etl/merge.py +++ b/gene/etl/merge.py @@ -135,7 +135,7 @@ def record_order(record): # merge from constituent records set_fields = ["aliases", "associated_with", "previous_symbols"] scalar_fields = ["symbol", "symbol_status", "label", "strand", - "location_annotations", "locations"] + "location_annotations"] for record in records: for field in set_fields: merged_attrs[field] |= set(record.get(field, set())) @@ -144,6 +144,10 @@ def record_order(record): if field not in merged_attrs and field in record: merged_attrs[field] = record[field] + locations = record.get("locations") + if locations: + merged_attrs[f"{record['src_name'].lower()}_locations"] = locations + gene_type = record.get("gene_type") if gene_type: merged_field = GeneTypeFieldName[record["src_name"].upper()] diff --git a/gene/query.py b/gene/query.py index 6da3a7bc..58b4fb8c 100644 --- a/gene/query.py +++ b/gene/query.py @@ -81,6 +81,20 @@ def fetch_meta(self, src_name: str) -> SourceMeta: except ClientError as e: logger.error(e.response['Error']['Message']) + @staticmethod + def _transform_sequence_location(loc: Dict) -> models.SequenceLocation: + """Transform a sequence location to VRS sequence location + :param Dict loc: Sequence location + :return: VRS sequence location + """ + return models.SequenceLocation( + type="SequenceLocation", + sequence_id=loc["sequence_id"], + interval=models.SequenceInterval( + type="SequenceInterval", + start=models.Number(value=int(loc["start"]), type="Number"), + end=models.Number(value=int(loc["end"]), type="Number"))) + @staticmethod def _transform_chromosome_location(loc: Dict) -> models.ChromosomeLocation: """Transform a chromosome location to VRS chromosome location @@ -98,6 +112,18 @@ def _transform_chromosome_location(loc: Dict) -> models.ChromosomeLocation: end=loc["end"])) return transformed_loc + def _transform_location(self, loc: Dict) -> Dict: + """Transform a sequence/chromosome location to VRS sequence/chromosome location + :param Dict loc: Sequence or Chromosome location + :return: VRS sequence or chromosome location represented as a dictionary + """ + if loc["type"] == VRSTypes.SEQUENCE_LOCATION: + loc = self._transform_sequence_location(loc) + else: + loc = self._transform_chromosome_location(loc) + loc._id = ga4gh_identify(loc) + return loc.as_dict() + def _transform_locations(self, record: Dict) -> Dict: """Transform gene locations to VRS Chromosome/Sequence Locations @@ -107,21 +133,7 @@ def _transform_locations(self, record: Dict) -> Dict: record_locations = list() if "locations" in record: for loc in record["locations"]: - if loc["type"] == VRSTypes.SEQUENCE_LOCATION: - transformed_loc = models.SequenceLocation( - type="SequenceLocation", - sequence_id=loc["sequence_id"], - interval=models.SequenceInterval( - type="SequenceInterval", - start=models.Number(value=int(loc["start"]), type="Number"), - end=models.Number(value=int(loc["end"]), type="Number"))) - else: - transformed_loc = self._transform_chromosome_location(loc) - - transformed_loc._id = ga4gh_identify(transformed_loc) - transformed_loc = transformed_loc.as_dict() - record_locations.append(transformed_loc) - + record_locations.append(self._transform_location(loc)) record["locations"] = record_locations return record @@ -437,20 +449,33 @@ def add_gene_descriptor( extension_and_record_labels = [ ("symbol_status", "symbol_status"), ("approved_name", "label"), - ("chromosome_location", "locations"), ("associated_with", "associated_with"), ("previous_symbols", "previous_symbols"), + ("location_annotations", "location_annotations") ] for ext_label, record_label in extension_and_record_labels: if record_label in record and record[record_label]: - if ext_label == 'chromosome_location': - loc = self._transform_chromosome_location(record[record_label][0]) - loc._id = ga4gh_identify(loc) - record[record_label] = loc.as_dict() extensions.append(Extension( name=ext_label, value=record[record_label] )) + + record_locations = dict() + if record["item_type"] == "identity": + locs = record.get("locations") + if locs: + record_locations[f"{record['src_name'].lower()}_locations"] = locs + elif record["item_type"] == "merger": + for k, v in record.items(): + if k.endswith("locations") and v: + record_locations[k] = v + + for loc_name, locations in record_locations.items(): + transformed_locs = list() + for loc in locations: + transformed_locs.append(self._transform_location(loc)) + extensions.append(Extension(name=loc_name, value=transformed_locs)) + # handle gene types separately because they're wonky if record["item_type"] == "identity": gene_type = record.get("gene_type") diff --git a/gene/version.py b/gene/version.py index 72bcaab2..fe956cdc 100644 --- a/gene/version.py +++ b/gene/version.py @@ -1,2 +1,2 @@ """Gene normalizer version""" -__version__ = "0.1.29" +__version__ = "0.1.30" diff --git a/tests/unit/data/etl_data/ensembl_108.gff3 b/tests/unit/data/etl_data/ensembl_108.gff3 index 5256b224..4592f4d5 100644 --- a/tests/unit/data/etl_data/ensembl_108.gff3 +++ b/tests/unit/data/etl_data/ensembl_108.gff3 @@ -200,7 +200,7 @@ #!genebuild-last-updated 2021-03 1 GRCh38 chromosome 1 248956422 . . . ID=chromosome:1;Alias=CM000663.2,chr1,NC_000001.11 ### -1 havana pseudogene 11869 14409 . + . ID=gene:ENSG00000223972;Name=DDX11L1;biotype=transcribed_unprocessed_pseudogene;description=DEAD/H-box helicase 11 like 1 (pseudogene) [Source:HGNC Symbol%3BAcc:HGNC:37102];gene_id=ENSG00000223972;logic_name=havana_homo_sapiens;version=5 +1 havana pseudogene 12010 13670 . + . ID=gene:ENSG00000223972;Name=DDX11L1;biotype=transcribed_unprocessed_pseudogene;description=DEAD/H-box helicase 11 like 1 (pseudogene) [Source:HGNC Symbol%3BAcc:HGNC:37102];gene_id=ENSG00000223972;logic_name=havana_homo_sapiens;version=6 ### 1 ensembl_havana gene 220148293 220272453 . - . ID=gene:ENSG00000118873;Name=RAB3GAP2;biotype=protein_coding;description=RAB3 GTPase activating non-catalytic protein subunit 2 [Source:HGNC Symbol%3BAcc:HGNC:17168];gene_id=ENSG00000118873;logic_name=ensembl_havana_gene_homo_sapiens;version=16 ### @@ -220,7 +220,7 @@ ### 9 GRCh38 chromosome 1 138394717 . . . ID=chromosome:9;Alias=CM000671.2,chr9,NC_000009.12 ### -9 ensembl_havana gene 130713016 130887675 . + . ID=gene:ENSG00000097007;Name=ABL1;biotype=protein_coding;description=ABL proto-oncogene 1%2C non-receptor tyrosine kinase [Source:HGNC Symbol%3BAcc:HGNC:76];gene_id=ENSG00000097007;logic_name=ensembl_havana_gene_homo_sapiens;version=19 +9 ensembl_havana gene 130713043 130887675 . + . ID=gene:ENSG00000097007;Name=ABL1;biotype=protein_coding;description=ABL proto-oncogene 1%2C non-receptor tyrosine kinase [Source:HGNC Symbol%3BAcc:HGNC:76];gene_id=ENSG00000097007;logic_name=ensembl_havana_gene_homo_sapiens;version=20 ### 11 GRCh38 chromosome 1 135086622 . . . ID=chromosome:11;Alias=CM000673.2,chr11,NC_000011.10 ### @@ -242,7 +242,7 @@ ### X GRCh38 chromosome 1 156040895 . . . ID=chromosome:X;Alias=CM000685.2,chrX,NC_000023.11 ### -X havana ncRNA_gene 154424378 154428512 . - . ID=gene:ENSG00000197180;Name=CH17-340M24.3;biotype=lncRNA;description=uncharacterized protein BC009467 [Source:NCBI gene (formerly Entrezgene)%3BAcc:158960];gene_id=ENSG00000197180;logic_name=havana_homo_sapiens;version=3 +X havana ncRNA_gene 154424378 154428526 . - . ID=gene:ENSG00000197180;Name=ATP6AP1-DT;biotype=lncRNA;description=ATP6AP1 divergent transcript [Source:HGNC Symbol%3BAcc:HGNC:25138];gene_id=ENSG00000197180;logic_name=havana_homo_sapiens;version=4 ### X ensembl_havana gene 155612572 155782459 . + . ID=gene:ENSG00000168939;Name=SPRY3;biotype=protein_coding;description=sprouty RTK signaling antagonist 3 [Source:HGNC Symbol%3BAcc:HGNC:11271];gene_id=ENSG00000168939;logic_name=ensembl_havana_gene_homo_sapiens;version=12 ### diff --git a/tests/unit/data/etl_data/hgnc_20210810.json b/tests/unit/data/etl_data/hgnc_20210810.json index bdb65134..821e035c 100644 --- a/tests/unit/data/etl_data/hgnc_20210810.json +++ b/tests/unit/data/etl_data/hgnc_20210810.json @@ -251,17 +251,10 @@ "location_sortable": "22pter-q11" }, { - "date_approved_reserved": "1999-09-29", - "alias_name": [ - "chromatin assembly factor I (150 kDa)" - ], "vega_id": "OTTHUMG00000181922", - "locus_group": "protein-coding gene", - "mane_select": [ - "ENST00000301280.10", - "NM_005483.3" - ], - "status": "Approved", + "ucsc_id": "uc002mal.4", + "name": "chromatin assembly factor 1 subunit A", + "entrez_id": "10036", "alias_symbol": [ "CAF1P150", "CAF1B", @@ -270,47 +263,54 @@ "P150", "MGC71229" ], - "_version_": 1707696198253543425, - "uuid": "cbaac19b-6e86-4b58-9053-e34c3aa5d99e", "prev_name": [ "chromatin assembly factor 1, subunit A (p150)" ], - "refseq_accession": [ - "NM_005483" + "mane_select": [ + "ENST00000301280.10", + "NM_005483.3" ], - "locus_type": "gene with protein product", - "agr": "HGNC:1910", + "locus_group": "protein-coding gene", "hgnc_id": "HGNC:1910", - "rgd_id": [ - "RGD:1590865" + "pubmed_id": [ + 7600578 + ], + "symbol": "CHAF1A", + "locus_type": "gene with protein product", + "status": "Approved", + "mgd_id": [ + "MGI:1351331" + ], + "ccds_id": [ + "CCDS32875" ], "ensembl_gene_id": "ENSG00000167670", - "entrez_id": "10036", + "uuid": "61416b73-dffa-4eb9-9af7-a82d97a84e77", + "date_name_changed": "2015-11-23", "omim_id": [ "601246" ], - "symbol": "CHAF1A", - "date_name_changed": "2015-11-23", - "location": "19p13.3", - "name": "chromatin assembly factor 1 subunit A", - "date_modified": "2019-08-21", - "mgd_id": [ - "MGI:1351331" + "rgd_id": [ + "RGD:1590865" ], - "ucsc_id": "uc002mal.4", + "agr": "HGNC:1910", + "location_sortable": "19p13.3", + "date_modified": "2019-08-21", "uniprot_ids": [ "Q13111" ], - "ccds_id": [ - "CCDS32875" + "date_approved_reserved": "1999-09-29", + "refseq_accession": [ + "NM_005483" + ], + "alias_name": [ + "chromatin assembly factor I (150 kDa)" ], "ena": [ "U20979" ], - "pubmed_id": [ - 7600578 - ], - "location_sortable": "19p13.3" + "_version_": 1747674142400839680, + "location": "19p13.3" }, { "date_approved_reserved": "2003-11-13", @@ -459,68 +459,71 @@ "location_sortable": "Xp22.32 and Yp11.3" }, { - "date_approved_reserved": "2005-05-06", - "alias_name": [ - "iGb3 synthase", - "isoglobotriaosylceramide synthase" + "status": "Approved", + "mgd_id": [ + "MGI:2685279" ], - "vega_id": "OTTHUMG00000004125", - "locus_group": "protein-coding gene", + "hgnc_id": "HGNC:30005", + "symbol": "A3GALT2", + "entrez_id": "127550", "mane_select": [ "ENST00000442999.3", "NM_001080438.1" ], - "status": "Approved", - "alias_symbol": [ - "IGBS3S", - "IGB3S" - ], - "_version_": 1707696195380445184, - "uuid": "ec929101-693b-4afc-ae1b-bbe1d38f9c62", "prev_name": [ "alpha 1,3-galactosyltransferase 2, pseudogene" ], - "refseq_accession": [ - "NM_001080438" - ], - "locus_type": "gene with protein product", - "agr": "HGNC:30005", - "hgnc_id": "HGNC:30005", - "rgd_id": [ - "RGD:727913" + "prev_symbol": [ + "A3GALT2P" ], - "ensembl_gene_id": "ENSG00000184389", - "entrez_id": "127550", "gene_group": [ "Glycosyltransferase family 6" ], - "symbol": "A3GALT2", - "date_name_changed": "2013-03-11", - "location": "1p35.1", "name": "alpha 1,3-galactosyltransferase 2", - "date_modified": "2018-02-08", - "mgd_id": [ - "MGI:2685279" - ], - "ucsc_id": "uc031plq.1", - "prev_symbol": [ - "A3GALT2P" + "alias_name": [ + "iGb3 synthase", + "isoglobotriaosylceramide synthase" ], + "_version_": 1747674139854897153, "uniprot_ids": [ "U3KPV4" ], + "rgd_id": [ + "RGD:727913" + ], + "agr": "HGNC:30005", + "date_modified": "2018-02-08", + "ensembl_gene_id": "ENSG00000184389", + "date_name_changed": "2013-03-11", + "locus_type": "gene with protein product", "ccds_id": [ "CCDS60080" ], - "gene_group_id": [ - 429 - ], - "date_symbol_changed": "2013-03-11", "pubmed_id": [ 10854427, 18630988 ], - "location_sortable": "01p35.1" + "alias_symbol": [ + "IGBS3S", + "IGB3S" + ], + "date_symbol_changed": "2013-03-11", + "locus_group": "protein-coding gene", + "vega_id": "OTTHUMG00000004125", + "ucsc_id": "uc031plq.1", + "gene_group_id": [ + 429 + ], + "location": "1p35.1", + "date_approved_reserved": "2005-05-06", + "refseq_accession": [ + "NM_001080438" + ], + "omim_id": [ + "619850" + ], + "location_sortable": "01p35.1", + "uuid": "2192efcd-2c34-43b1-aca0-2fc9f72ced47" }, { "date_approved_reserved": "2009-02-18", @@ -1169,8 +1172,33 @@ ], "entrez_id": "109280162", "location_sortable": "10q23.3 or 10q24.2" - } + }, + { + "name": "interferon production regulator", + "locus_group": "other", + "entrez_id": "3466", + "symbol": "IFNR", + "pubmed_id": [ + 1906174, + 1193239 + ], + "hgnc_id": "HGNC:5447", + "status": "Approved", + "locus_type": "unknown", + "uuid": "58487ff1-1a71-435a-89b2-02f7e275b3af", + "location_sortable": "16", + "date_modified": "2019-06-26", + "omim_id": [ + "147573" + ], + "curator_notes": [ + "This gene has the locus type unknown because it has never been mapped to the human genome." + ], + "date_approved_reserved": "1986-01-01", + "location": "16", + "_version_": 1747674151184760833 + } ], "start": 0 } -} +} \ No newline at end of file diff --git a/tests/unit/data/etl_data/ncbi_GRCh38.p14.gff b/tests/unit/data/etl_data/ncbi_GRCh38.p14.gff index cad9dcee..d06ed874 100644 --- a/tests/unit/data/etl_data/ncbi_GRCh38.p14.gff +++ b/tests/unit/data/etl_data/ncbi_GRCh38.p14.gff @@ -49,7 +49,7 @@ NC_000011.10 Curated Genomic exon 117138227 117138867 . ##sequence-region NC_000012.12 1 133275309 ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 NC_000012.12 RefSeq region 1 133275309 . + . ID=NC_000012.12:1..133275309;Dbxref=taxon:9606;Name=12;chromosome=12;gbkey=Src;genome=chromosome;mol_type=genomic DNA -NC_000012.12 BestRefSeq%2CGnomon gene 133037301 133063299 . + . ID=gene-ZNF84;Dbxref=GeneID:7637,HGNC:HGNC:13159,MIM:618554;Name=ZNF84;description=zinc finger protein 84;gbkey=Gene;gene=ZNF84;gene_biotype=protein_coding;gene_synonym=HPF2 +NC_000012.12 BestRefSeq%2CGnomon gene 133037509 133063299 . + . ID=gene-ZNF84;Dbxref=GeneID:7637,HGNC:HGNC:13159,MIM:618554;Name=ZNF84;description=zinc finger protein 84;gbkey=Gene;gene=ZNF84;gene_biotype=protein_coding;gene_synonym=HPF2 ##sequence-region NC_000015.10 1 101991189 ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 NC_000015.10 RefSeq region 1 101991189 . + . ID=NC_000015.10:1..101991189;Dbxref=taxon:9606;Name=15;chromosome=15;gbkey=Src;genome=chromosome;mol_type=genomic DNA @@ -58,7 +58,7 @@ NC_000015.10 BestRefSeq gene 89784895 89814852 . - . ID=gene-ANPEP;Dbxref=GeneID ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 NC_000019.10 RefSeq region 1 58617616 . + . ID=NC_000019.10:1..58617616;Dbxref=taxon:9606;Name=19;chromosome=19;gbkey=Src;genome=chromosome;mol_type=genomic DNA NC_000019.10 BestRefSeq%2CGnomon gene 38211006 38229695 . - . ID=gene-DPF1;Dbxref=GeneID:8193,HGNC:HGNC:20225,MIM:601670;Name=DPF1;description=double PHD fingers 1;gbkey=Gene;gene=DPF1;gene_biotype=protein_coding;gene_synonym=BAF45b,NEUD4,neuro-d4 -NC_000019.10 BestRefSeq%2CGnomon gene 4402596 4448322 . + . ID=gene-CHAF1A;Dbxref=GeneID:10036,HGNC:HGNC:1910,MIM:601246;Name=CHAF1A;description=chromatin assembly factor 1 subunit A;gbkey=Gene;gene=CHAF1A;gene_biotype=protein_coding;gene_synonym=CAF-1,CAF1,CAF1B,CAF1P150,P150 +NC_000019.10 BestRefSeq%2CGnomon gene 4402640 4450830 . + . ID=gene-CHAF1A;Dbxref=GeneID:10036,HGNC:HGNC:1910,MIM:601246;Name=CHAF1A;description=chromatin assembly factor 1 subunit A;gbkey=Gene;gene=CHAF1A;gene_biotype=protein_coding;gene_synonym=CAF-1,CAF1,CAF1B,CAF1P150,P150 ##sequence-region NT_187390.1 1 42811 ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 NT_187390.1 RefSeq region 1 42811 . + . ID=NT_187390.1:1..42811;Dbxref=taxon:9606;Name=22;chromosome=22;gbkey=Src;genome=genomic;map=unlocalized;mol_type=genomic DNA @@ -83,7 +83,7 @@ NC_000023.11 BestRefSeq gene 155612586 155782459 . + . ID=gene-SPRY3;Dbxref=Gene ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 NC_000024.10 RefSeq region 1 57227415 . + . ID=NC_000024.10:1..57227415;Dbxref=taxon:9606;Name=Y;chromosome=Y;gbkey=Src;genome=chromosome;mol_type=genomic DNA NC_000024.10 BestRefSeq gene 1386152 1392113 . - . ID=gene-SLC25A6-2;Dbxref=GeneID:293,HGNC:HGNC:10992,MIM:403000;Name=SLC25A6;description=solute carrier family 25 member 6;gbkey=Gene;gene=SLC25A6;gene_biotype=protein_coding;gene_synonym=AAC3,ANT,ANT 2,ANT 3,ANT3,ANT3Y -NC_000024.10 BestRefSeq gene 56923423 56968979 . + . ID=gene-SPRY3-2;Dbxref=GeneID:10251,HGNC:HGNC:11271,MIM:300531;Name=SPRY3;description=sprouty RTK signaling antagonist 3;gbkey=Gene;gene=SPRY3;gene_biotype=protein_coding;gene_synonym=spry-3;partial=true;start_range=.,56923423 +NC_000023.11 BestRefSeq gene 155612586 155782459 . + . ID=gene-SPRY3;Dbxref=GeneID:10251,HGNC:HGNC:11271,MIM:300531;Name=SPRY3;description=sprouty RTK signaling antagonist 3;gbkey=Gene;gene=SPRY3;gene_biotype=protein_coding;gene_synonym=spry-3 ##sequence-region NT_167246.2 1 4677643 ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606 NT_167246.2 RefSeq region 1 4677643 . + . ID=NT_167246.2:1..4677643;Dbxref=taxon:9606;Name=6;chromosome=6;gbkey=Src;genome=genomic;map=6p22.1-21.32;mol_type=genomic DNA diff --git a/tests/unit/data/etl_data/ncbi_history_20210813.tsv b/tests/unit/data/etl_data/ncbi_history_20210813.tsv index 06b23b48..76026be3 100644 --- a/tests/unit/data/etl_data/ncbi_history_20210813.tsv +++ b/tests/unit/data/etl_data/ncbi_history_20210813.tsv @@ -16,3 +16,4 @@ 9606 7637 100287429 LOC100287429 20110803 9606 - 103344718 HOTS 20200620 9606 - 544580 AASTH23 20190503 +9606 10036 107985297 LOC107985297 20220408 diff --git a/tests/unit/data/etl_data/ncbi_info_20210813.tsv b/tests/unit/data/etl_data/ncbi_info_20210813.tsv index 46735b88..ef963c13 100644 --- a/tests/unit/data/etl_data/ncbi_info_20210813.tsv +++ b/tests/unit/data/etl_data/ncbi_info_20210813.tsv @@ -5,7 +5,7 @@ 9606 170 AFA - - MIM:106250 - - ankyloblepharon filiforme adnatum unknown - - - - 20191002 - 9606 290 ANPEP - APN|CD13|GP150|LAP1|P150|PEPN MIM:151530|HGNC:HGNC:500|Ensembl:ENSG00000166825 15 15q26.1 alanyl aminopeptidase, membrane protein-coding ANPEP alanyl aminopeptidase, membrane O aminopeptidase N|AP-M|AP-N|alanyl (membrane) aminopeptidase|aminopeptidase M|hAPN|membrane alanyl aminopeptidase|microsomal aminopeptidase|myeloid plasma membrane glycoprotein CD13 20210708 - 9606 673 BRAF - B-RAF1|B-raf|BRAF1|NS7|RAFB1 MIM:164757|HGNC:HGNC:1097|Ensembl:ENSG00000157764 7 7q34 B-Raf proto-oncogene, serine/threonine kinase protein-coding BRAF B-Raf proto-oncogene, serine/threonine kinase O serine/threonine-protein kinase B-raf|94 kDa B-raf protein|B-Raf proto-oncogene serine/threonine-protein kinase (p94)|B-Raf serine/threonine-protein|murine sarcoma viral (v-raf) oncogene homolog B1|proto-oncogene B-Raf|v-raf murine sarcoma viral oncogene homolog B|v-raf murine sarcoma viral oncogene homolog B1 20210809 - -9606 10036 CHAF1A - CAF-1|CAF1|CAF1B|CAF1P150|P150 MIM:601246|HGNC:HGNC:1910|Ensembl:ENSG00000167670 19 19p13.3 chromatin assembly factor 1 subunit A protein-coding CHAF1A chromatin assembly factor 1 subunit A O chromatin assembly factor 1 subunit A|CAF-1 subunit A|CAF-I 150 kDa subunit|CAF-I p150|CTB-50L17.7|chromatin assembly factor I (150 kDa)|chromatin assembly factor I p150 subunit|hp150 20210808 - +9606 10036 CHAF1A - CAF-1|CAF1|CAF1B|CAF1P150|P150 MIM:601246|HGNC:HGNC:1910|Ensembl:ENSG00000167670|AllianceGenome:HGNC:1910 19 19p13.3 chromatin assembly factor 1 subunit A protein-coding CHAF1A chromatin assembly factor 1 subunit A O chromatin assembly factor 1 subunit A|CAF-1 subunit A|CAF-I 150 kDa subunit|CAF-I p150|CTB-50L17.7|chromatin assembly factor I (150 kDa)|chromatin assembly factor I p150 subunit|hp150 20221023 - 9606 9646 CTR9 - SH2BP1|TSBP|p150|p150TSP MIM:609366|HGNC:HGNC:16850|Ensembl:ENSG00000198730 11 11p15.4 CTR9 homolog, Paf1/RNA polymerase II complex component protein-coding CTR9 CTR9 homolog, Paf1/RNA polymerase II complex component O RNA polymerase-associated protein CTR9 homolog|Ctr9, Paf1/RNA polymerase II complex component, homolog|SH2 domain binding protein 1 (tetratricopeptide repeat containing)|TPR-containing, SH2-binding phosphoprotein 20210708 - 9606 8193 DPF1 - BAF45b|NEUD4|SMARCG1|neuro-d4 MIM:601670|HGNC:HGNC:20225|Ensembl:ENSG00000011332 19 19q13.2 double PHD fingers 1 protein-coding DPF1 double PHD fingers 1 O zinc finger protein neuro-d4|BRG1-associated factor 45B|D4, zinc and double PHD fingers family 1|neuro-d4 homolog 20210726 - 9606 2722 GLC1B - - MIM:606689 2 2cen-q13 glaucoma 1, open angle, B (adult-onset) unknown - - - - 20191002 - @@ -19,7 +19,8 @@ 9606 25782 RAB3GAP2 - MARTS1|RAB3-GAP150|RAB3GAP150|SPG69|WARBM2|p150 MIM:609275|HGNC:HGNC:17168|Ensembl:ENSG00000118873 1 1q41 RAB3 GTPase activating non-catalytic protein subunit 2 protein-coding RAB3GAP2 RAB3 GTPase activating non-catalytic protein subunit 2 O rab3 GTPase-activating protein non-catalytic subunit|RAB3 GTPase activating protein subunit 2 (non-catalytic)|RGAP-iso|rab3 GTPase-activating protein 150 kDa subunit|rab3-GAP p150|rab3-GAP regulatory subunit 20210709 - 9606 293 SLC25A6 - AAC3|ANT|ANT 2|ANT 3|ANT3|ANT3Y MIM:300151|MIM:403000|HGNC:HGNC:10992|Ensembl:ENSG00000169100 X|Y X;Y solute carrier family 25 member 6 protein-coding SLC25A6 solute carrier family 25 member 6 O ADP/ATP translocase 3|ADP,ATP carrier protein 3|ADP,ATP carrier protein, liver|ADP/ATP translocator of liver|adenine nucleotide translocator 3|epididymis secretory sperm binding protein|solute carrier family 25 (mitochondrial carrier; adenine nucleotide translocator), member 6 20210708 - 9606 100049159 SPG37 - - MIM:611945 8 8p21.2-q13.3 spastic paraplegia 37 (autosomal dominant) unknown - - - - 20191002 - -9606 10251 SPRY3 - spry-3 MIM:300531|HGNC:HGNC:11271|Ensembl:ENSG00000168939 X|Y Xq28 and Yq12 sprouty RTK signaling antagonist 3 protein-coding SPRY3 sprouty RTK signaling antagonist 3 O protein sprouty homolog 3|antagonist of FGF signaling|sprouty homolog 3|sprouty3 20210807 - -9606 7637 ZNF84 - HPF2 MIM:618554|HGNC:HGNC:13159|Ensembl:ENSG00000198040 12 12q24.33|map from Rosati ref via FISH [AFS] zinc finger protein 84 protein-coding ZNF84 zinc finger protein 84 O zinc finger protein 84|zinc finger protein HPF2 20210611 - +9606 10251 SPRY3 - spry-3 MIM:300531|HGNC:HGNC:11271|Ensembl:ENSG00000168939|AllianceGenome:HGNC:11271 X|Y Xq28 and Yq12 sprouty RTK signaling antagonist 3 protein-coding SPRY3 sprouty RTK signaling antagonist 3 O protein sprouty homolog 3|antagonist of FGF signaling|sprouty homolog 3|sprouty3 20220805 - +9606 7637 ZNF84 - HPF2 MIM:618554|HGNC:HGNC:13159|Ensembl:ENSG00000198040|AllianceGenome:HGNC:13159 12 12q24.33|map from Rosati ref via FISH [AFS] zinc finger protein 84 protein-coding ZNF84 zinc finger protein 84 O zinc finger protein 84|zinc finger protein HPF2 20220906 - 9606 619538 OMS - COME/ROM MIM:166760 10|19|3 10q26.3;19q13.42-q13.43;3p25.3 otitis media, susceptibility to unknown - - - chronic/recurrent otitis media 20170408 - 9606 653303 LOC653303 - - - 11 11q23.3 proprotein convertase subtilisin/kexin type 7 pseudogene pseudo - - - - 20211123 - +9606 3466 IFNR - IFNGM|IFNGM2 MIM:147573|HGNC:HGNC:5447 16 - interferon production regulator unknown IFNR interferon production regulator O - 20190324 - diff --git a/tests/unit/test_ensembl_source.py b/tests/unit/test_ensembl_source.py index aca97885..d6e78721 100644 --- a/tests/unit/test_ensembl_source.py +++ b/tests/unit/test_ensembl_source.py @@ -35,10 +35,10 @@ def ddx11l1(): "location_annotations": [], "locations": [ { - "_id": "ga4gh:VSL.naD2_Q0JKCEKkGj8FvMzerePKnNNcF5N", + "_id": "ga4gh:VSL.dJvg-3cfkRkZEOuAmJ9xbkTBrTCea2VP", "interval": { - "end": {"value": 14409, "type": "Number"}, - "start": {"value": 11868, "type": "Number"}, + "end": {"value": 13670, "type": "Number"}, + "start": {"value": 12009, "type": "Number"}, "type": "SequenceInterval" }, "sequence_id": "ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO", @@ -85,23 +85,23 @@ def tp53(): @pytest.fixture(scope="module") -def CH17_340M24_3(): - """Create a CH17-340M24.3 test fixture.""" +def ATP6AP1_DT(): + """Create a ATP6AP1-DT test fixture.""" params = { "match_type": MatchType.NO_MATCH, "concept_id": "ensembl:ENSG00000197180", - "symbol": "CH17-340M24.3", - "label": "uncharacterized protein BC009467", + "symbol": "ATP6AP1-DT", + "label": "ATP6AP1 divergent transcript", "previous_symbols": [], "aliases": [], - "xrefs": ["ncbigene:158960"], + "xrefs": ["hgnc:25138"], "symbol_status": None, "location_annotations": [], "locations": [ { - "_id": "ga4gh:VSL.Qgt1dnZLg46y-lkbsk2lCnlfose0VsFt", + "_id": "ga4gh:VSL.GPNFtIoo1cr6b_wQSk7xP2wzncSyXgSx", "interval": { - "end": {"value": 154428512, "type": "Number"}, + "end": {"value": 154428526, "type": "Number"}, "start": {"value": 154424377, "type": "Number"}, "type": "SequenceInterval" }, @@ -220,21 +220,21 @@ def test_tp53(ensembl, tp53): check_resp_single_record(resp, tp53, MatchType.SYMBOL) -def test_CH17_340M24_3(ensembl, CH17_340M24_3): - """Test that CH17-340M24.3 normalizes to correct gene concept.""" +def test_ATP6AP1_DT(ensembl, ATP6AP1_DT): + """Test that ATP6AP1-DT normalizes to correct gene concept.""" # Concept ID resp = ensembl.search("ensembl:ENSG00000197180") - check_resp_single_record(resp, CH17_340M24_3, MatchType.CONCEPT_ID) + check_resp_single_record(resp, ATP6AP1_DT, MatchType.CONCEPT_ID) resp = ensembl.search("ENSEMBL:ENSG00000197180") - check_resp_single_record(resp, CH17_340M24_3, MatchType.CONCEPT_ID) + check_resp_single_record(resp, ATP6AP1_DT, MatchType.CONCEPT_ID) resp = ensembl.search("ENSG00000197180") - check_resp_single_record(resp, CH17_340M24_3, MatchType.CONCEPT_ID) + check_resp_single_record(resp, ATP6AP1_DT, MatchType.CONCEPT_ID) # Symbol - resp = ensembl.search("CH17-340M24.3") - check_resp_single_record(resp, CH17_340M24_3, MatchType.SYMBOL) + resp = ensembl.search("ATP6AP1-DT") + check_resp_single_record(resp, ATP6AP1_DT, MatchType.SYMBOL) def test_hsa_mir_1253(ensembl, hsa_mir_1253): diff --git a/tests/unit/test_hgnc_source.py b/tests/unit/test_hgnc_source.py index 2408e0e3..811e8f62 100644 --- a/tests/unit/test_hgnc_source.py +++ b/tests/unit/test_hgnc_source.py @@ -175,6 +175,7 @@ def a3galt2(): "ucsc:uc031plq.1", "uniprot:U3KPV4", "ccds:CCDS60080", + "omim:619850", "pubmed:10854427", "pubmed:18630988", "refseq:NM_001080438" diff --git a/tests/unit/test_ncbi_source.py b/tests/unit/test_ncbi_source.py index 5f403401..ee49225c 100644 --- a/tests/unit/test_ncbi_source.py +++ b/tests/unit/test_ncbi_source.py @@ -202,10 +202,10 @@ def spry3(): "type": "SequenceLocation" }, { - "_id": "ga4gh:VSL.Cr_HtUTpUe6KB37Y7zOTDbx9JglIzE1O", + "_id": "ga4gh:VSL.SvfaYotp4SHyoacjNXRqdIE1AZPCx_SI", "interval": { "end": {"value": 56968979, "type": "Number"}, - "start": {"value": 56923422, "type": "Number"}, + "start": {"value": 56954315, "type": "Number"}, "type": "SequenceInterval" }, "sequence_id": "ga4gh:SQ.8_liLu1aycC0tPQPFmUaGXJLDs5SbPZ5", @@ -290,10 +290,10 @@ def znf84(): "type": "ChromosomeLocation" }, { - "_id": "ga4gh:VSL.w5FE3al-0SUkARxk_RdCD5ypYIh_WtSM", + "_id": "ga4gh:VSL.pT54XeMRdsoRFulVQU4wxmTcLR0jHbuu", "interval": { "end": {"value": 133063299, "type": "Number"}, - "start": {"value": 133037300, "type": "Number"}, + "start": {"value": 133037508, "type": "Number"}, "type": "SequenceInterval" }, "sequence_id": "ga4gh:SQ.6wlJpONE3oNb4D69ULmEXhqyDZ4vwNfl", diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 88094094..9992f135 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -83,20 +83,63 @@ def normalized_ache(): "type": "Extension" }, { - "name": "chromosome_location", - "value": { - "_id": "ga4gh:VCL.VtdU_0lYXL_o95lXRUfhv-NDJVVpmKoD", - "type": "ChromosomeLocation", - "species_id": "taxonomy:9606", - "chr": "7", - "interval": { - "end": "q22.1", - "start": "q22.1", - "type": "CytobandInterval" + "name": "hgnc_locations", + "value": [ + { + "_id": "ga4gh:VCL.VtdU_0lYXL_o95lXRUfhv-NDJVVpmKoD", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "7", + "interval": { + "end": "q22.1", + "start": "q22.1", + "type": "CytobandInterval" + } } - }, + ], "type": "Extension" }, + { + "name": "ensembl_locations", + "value": [ + { + "_id": "ga4gh:VSL.AF6wPZclBqTauGr3yx_CqmMndLKhq0Cm", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 100889993}, + "end": {"type": "Number", "value": 100896974} + } + } + ] + }, + { + "name": "ncbi_locations", + "value": [ + { + "_id": "ga4gh:VCL.VtdU_0lYXL_o95lXRUfhv-NDJVVpmKoD", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "7", + "interval": { + "end": "q22.1", + "start": "q22.1", + "type": "CytobandInterval" + } + }, + { + "_id": "ga4gh:VSL.EepkXho2doYcUT1DW54fT1a00_zkqrn0", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 100889993}, + "end": {"type": "Number", "value": 100896994} + } + } + ] + }, { "name": "ncbi_gene_type", "type": "Extension", @@ -168,20 +211,63 @@ def normalized_braf(): "type": "Extension" }, { - "name": "chromosome_location", - "value": { - "_id": "ga4gh:VCL.O6yCQ1cnThOrTfK9YUgMlTfM6HTqbrKw", - "type": "ChromosomeLocation", - "species_id": "taxonomy:9606", - "chr": "7", - "interval": { - "end": "q34", - "start": "q34", - "type": "CytobandInterval" + "name": "hgnc_locations", + "value": [ + { + "_id": "ga4gh:VCL.O6yCQ1cnThOrTfK9YUgMlTfM6HTqbrKw", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "7", + "interval": { + "end": "q34", + "start": "q34", + "type": "CytobandInterval" + } } - }, + ], "type": "Extension" }, + { + "name": "ensembl_locations", + "value": [ + { + "_id": "ga4gh:VSL.amNWL6i7F2nbSZAf2QLTRTujxuDrd0pR", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 140719326}, + "end": {"type": "Number", "value": 140924929} + } + } + ] + }, + { + "name": "ncbi_locations", + "value": [ + { + "_id": "ga4gh:VCL.O6yCQ1cnThOrTfK9YUgMlTfM6HTqbrKw", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "7", + "interval": { + "end": "q34", + "start": "q34", + "type": "CytobandInterval" + } + }, + { + "_id": "ga4gh:VSL.xZU3kL8F6t2ca6WH_26CWKfNW9-owhR4", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 140713327}, + "end": {"type": "Number", "value": 140924929} + } + } + ] + }, { "name": "ncbi_gene_type", "type": "Extension", @@ -267,20 +353,63 @@ def normalized_abl1(): "type": "Extension" }, { - "name": "chromosome_location", - "value": { - "_id": "ga4gh:VCL.WvMfE67KxSDAV8JaK593TI74yyJWIsMQ", - "type": "ChromosomeLocation", - "species_id": "taxonomy:9606", - "chr": "9", - "interval": { - "end": "q34.12", - "start": "q34.12", - "type": "CytobandInterval" + "name": "hgnc_locations", + "value": [ + { + "_id": "ga4gh:VCL.WvMfE67KxSDAV8JaK593TI74yyJWIsMQ", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "9", + "interval": { + "end": "q34.12", + "start": "q34.12", + "type": "CytobandInterval" + } } - }, + ], "type": "Extension" }, + { + "name": "ensembl_locations", + "value": [ + { + "_id": "ga4gh:VSL.4Zvsf29k0Kp3GLIQBLSakWuOF9zyy61c", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 130713042}, + "end": {"type": "Number", "value": 130887675} + } + } + ] + }, + { + "name": "ncbi_locations", + "value": [ + { + "_id": "ga4gh:VCL.WvMfE67KxSDAV8JaK593TI74yyJWIsMQ", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "9", + "interval": { + "end": "q34.12", + "start": "q34.12", + "type": "CytobandInterval" + } + }, + { + "_id": "ga4gh:VSL.4Zvsf29k0Kp3GLIQBLSakWuOF9zyy61c", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.KEO-4XBcm1cxeo_DIQ8_ofqGUkp4iZhI", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 130713042}, + "end": {"type": "Number", "value": 130887675} + } + } + ] + }, { "name": "ncbi_gene_type", "type": "Extension", @@ -319,7 +448,8 @@ def normalized_p150(): "CAF-1", "P150", "CAF1B", - "CAF1" + "CAF1", + "LOC107985297" ], "extensions": [ { @@ -347,20 +477,63 @@ def normalized_p150(): "type": "Extension" }, { - "name": "chromosome_location", - "value": { - "_id": "ga4gh:VCL.yF2TzeunqY92v3yhDsCR_t5X997mWriF", - "type": "ChromosomeLocation", - "species_id": "taxonomy:9606", - "chr": "19", - "interval": { - "end": "p13.3", - "start": "p13.3", - "type": "CytobandInterval" + "name": "hgnc_locations", + "value": [ + { + "_id": "ga4gh:VCL.yF2TzeunqY92v3yhDsCR_t5X997mWriF", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "19", + "interval": { + "end": "p13.3", + "start": "p13.3", + "type": "CytobandInterval" + } } - }, + ], "type": "Extension" }, + { + "name": "ensembl_locations", + "value": [ + { + "_id": "ga4gh:VSL.VVxEanUPWWMy_IChkj_kPIpRnYAatqrq", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 4402639}, + "end": {"type": "Number", "value": 4445018} + } + } + ] + }, + { + "name": "ncbi_locations", + "value": [ + { + "_id": "ga4gh:VCL.yF2TzeunqY92v3yhDsCR_t5X997mWriF", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "19", + "interval": { + "end": "p13.3", + "start": "p13.3", + "type": "CytobandInterval" + } + }, + { + "_id": "ga4gh:VSL.w4vjQaVy5L6oCfsQrNfOpeHjV4_EimQ1", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 4402639}, + "end": {"type": "Number", "value": 4450830} + } + } + ] + }, { "name": "ncbi_gene_type", "type": "Extension", @@ -375,6 +548,10 @@ def normalized_p150(): "name": "ensembl_biotype", "type": "Extension", "value": "protein_coding" + }, + { + "name": "previous_symbols", + "value": ["LOC107985297"] } ] } @@ -403,18 +580,30 @@ def normalized_loc_653303(): }, { "type": "Extension", - "name": "chromosome_location", - "value": { - "_id": "ga4gh:VCL.WzURLvTklFI7K2GAP8gIw6vgWDWXMXuW", - "type": "ChromosomeLocation", - "species_id": "taxonomy:9606", - "chr": "11", - "interval": { - "type": "CytobandInterval", - "start": "q23.3", - "end": "q23.3" + "name": "ncbi_locations", + "value": [ + { + "_id": "ga4gh:VCL.WzURLvTklFI7K2GAP8gIw6vgWDWXMXuW", + "type": "ChromosomeLocation", + "species_id": "taxonomy:9606", + "chr": "11", + "interval": { + "type": "CytobandInterval", + "start": "q23.3", + "end": "q23.3" + } + }, + { + "_id": "ga4gh:VSL.dhj3ZilmW0bmmUjUvrG7zCWwsPn-7XyB", + "type": "SequenceLocation", + "sequence_id": "ga4gh:SQ.2NkFm8HK88MqeNkCgj78KidCAXgnsfV1", + "interval": { + "type": "SequenceInterval", + "start": {"type": "Number", "value": 117135528}, + "end": {"type": "Number", "value": 117138867} + } } - } + ] }, { "type": "Extension", @@ -612,18 +801,18 @@ def normalize_unmerged_chaf1a(): } }, { - "_id": "ga4gh:VSL.X4HEwp9RgFN5WpmJM4bWpcOcN9qHX-hj", + "_id": "ga4gh:VSL.w4vjQaVy5L6oCfsQrNfOpeHjV4_EimQ1", "type": "SequenceLocation", "sequence_id": "ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl", # noqa: E501 "interval": { "type": "SequenceInterval", "start": { "type": "Number", - "value": 4402595 + "value": 4402639 }, "end": { "type": "Number", - "value": 4448322 + "value": 4450830 } } } @@ -635,7 +824,7 @@ def normalize_unmerged_chaf1a(): "CAF1B", "CAF-1" ], - "previous_symbols": [], + "previous_symbols": ["LOC107985297"], "xrefs": [ "ensembl:ENSG00000167670", "hgnc:1910" @@ -803,6 +992,61 @@ def normalize_unmerged_ache(): } +@pytest.fixture(scope="module") +def normalized_ifnr(): + """Return normalized Gene Descriptor for IFNR.""" + params = { + "id": "normalize.gene:IFNR", + "type": "GeneDescriptor", + "gene_id": "hgnc:5447", + "label": "IFNR", + "xrefs": { + "ncbigene:3466" + }, + "alternate_labels": [ + "IFNGM", + "IFNGM2" + ], + "extensions": [ + { + "name": "approved_name", + "value": "interferon production regulator", + "type": "Extension" + }, + { + "name": "symbol_status", + "value": "approved", + "type": "Extension" + }, + { + "name": "associated_with", + "value": [ + "pubmed:1906174", + "omim:147573", + "pubmed:1193239" + ], + "type": "Extension" + }, + { + "name": "ncbi_gene_type", + "type": "Extension", + "value": "unknown" + }, + { + "name": "hgnc_locus_type", + "type": "Extension", + "value": "unknown" + }, + { + "name": "location_annotations", + "type": "Extension", + "value": ["16"] + } + ] + } + return GeneDescriptor(**params) + + @pytest.fixture(scope='module') def num_sources(): """Get the number of sources.""" @@ -917,17 +1161,21 @@ def compare_gene_descriptor(test, actual): extensions_present = "extensions" in test.__fields__.keys() assert ("extensions" in actual.__fields__.keys()) == extensions_present if extensions_present: - assert len(actual.extensions) == len(test.extensions), \ - "len of extensions" + assert len(actual.extensions) == len(test.extensions), "len of extensions" n_ext_correct = 0 for test_ext in test.extensions: for actual_ext in actual.extensions: if actual_ext.name == test_ext.name: - assert isinstance(actual_ext.value, - type(test_ext.value)) + assert isinstance(actual_ext.value, type(test_ext.value)) if isinstance(test_ext.value, list): - assert set(actual_ext.value) == \ - set(test_ext.value), f"{test_ext.value} value" # noqa: E501 + if test_ext.value: + if isinstance(test_ext.value[0], dict): + assert actual_ext.value == test_ext.value + else: + assert set(actual_ext.value) == \ + set(test_ext.value), f"{test_ext.value} value" + else: + assert actual_ext.value == test_ext.value else: assert actual_ext.value == test_ext.value assert actual_ext.type == test_ext.type @@ -1281,6 +1529,17 @@ def test_normalize_single_entry(query_handler, normalized_loc_653303): expected_source_meta=[SourceName.NCBI.value]) +def test_normalize_no_locations(query_handler, normalized_ifnr): + """Test that the normalized endpoint correcly shapes merged entity with no + locations + """ + q = "IFNR" + resp = query_handler.normalize(q) + compare_normalize_resp( + resp, q, MatchType.SYMBOL, normalized_ifnr, + expected_source_meta=[SourceName.HGNC.value, SourceName.NCBI.value]) + + def test_normalize_unmerged(query_handler, normalize_unmerged_loc_653303, normalize_unmerged_chaf1a, normalize_unmerged_ache): """Test that unmerged normalization produces correct results."""