Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update MeSH predictions and curations #168

Merged
merged 9 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion scripts/import_gilda_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
from typing import Iterable

from biomappings import load_false_mappings, load_mappings
from biomappings.resources import PredictionTuple, append_prediction_tuples
from biomappings.utils import get_script_url

Expand All @@ -26,6 +27,8 @@
"NCIT": "ncit",
"GO": "go",
"FPLX": "fplx",
"UP": "uniprot",
"MESH": "mesh",
}


Expand All @@ -45,16 +48,44 @@ def get_primary_mappings():
return mappings


def get_curated_mappings():
"""Get curated mappings."""
curated_mappings = set()
for mapping in load_mappings() + load_false_mappings():
mapping_tuples = {
(
mapping["source prefix"],
mapping["source identifier"],
mapping["target prefix"],
mapping["target identifier"],
),
(
mapping["target prefix"],
mapping["target identifier"],
mapping["source prefix"],
mapping["source identifier"],
),
}
curated_mappings |= mapping_tuples
return curated_mappings


def get_mappings() -> Iterable[PredictionTuple]:
"""Iterate lexical mappings from Gilda."""
url = get_script_url(__file__)
mapping_type = "semapv:LexicalMatching"
match_type = "skos:exactMatch"
confidence = 0.95
primary_mappings = get_primary_mappings()
curated_mappings = get_curated_mappings()
with open(GILDA_MAPPINGS, "r") as fh:
for _, mesh_id, mesh_name, db_ns, db_id, db_name in csv.reader(fh, delimiter="\t"):
if ("mesh", mesh_id, db_ns, db_id) in primary_mappings:
if ("mesh", mesh_id, db_ns_mappings[db_ns], db_id) in primary_mappings or (
"mesh",
mesh_id,
db_ns_mappings[db_ns],
db_id,
) in curated_mappings:
continue
yield PredictionTuple(
"mesh",
Expand Down
5 changes: 5 additions & 0 deletions src/biomappings/resources/incorrect.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,9 @@ mesh D000068256 Darbepoetin alfa skos:exactMatch hgnc 4392 GNAS semapv:ManualMap
mesh D000068437 Pemetrexed skos:exactMatch chebi CHEBI:17509 5'-S-methyl-5'-thioadenosine semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D000068800 Etanercept skos:exactMatch hgnc 11917 TNFRSF1B semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D000070636 Rotator Cuff Injuries skos:exactMatch efo 1001250 rotator cuff tear semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D000071071 Microaneurysm skos:exactMatch hp HP:0032416 Retinal microaneurysm semapv:ManualMappingCuration orcid:0000-0001-9439-5346
mesh D000071636 Protein Phosphatase 2C skos:exactMatch hgnc 9279 PDP1 semapv:ManualMappingCuration orcid:0000-0001-9439-5346
mesh D000071960 Breast Carcinoma In Situ skos:exactMatch ncit C3641 Stage 0 Breast Cancer AJCC v6 and v7 semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/a80ed2/scripts/import_gilda_mappings.py 0.95
mesh D000074767 Diapause skos:exactMatch go GO:0030431 sleep semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D000077190 Interferon alpha-2 skos:exactMatch hgnc 5423 IFNA2 semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D000077212 Ropivacaine skos:exactMatch chebi CHEBI:8890 (S)-ropivacaine semapv:ManualMappingCuration orcid:0000-0003-4423-4370
Expand Down Expand Up @@ -866,13 +868,16 @@ mesh D054467 Phospholipases A2 skos:exactMatch hgnc 9030 PLA2G1B semapv:ManualMa
mesh D054629 Genome, Mitochondrial skos:exactMatch go GO:0000262 mitochondrial chromosome semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D054740 Dendritic Cell Sarcoma, Follicular skos:exactMatch doid DOID:7849 dendritic cell sarcoma semapv:ManualMappingCuration orcid:0000-0003-1307-2508
mesh D054818 Hexosaminidase A skos:exactMatch go GO:0004563 beta-N-acetylhexosaminidase activity semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D055607 Receptors, Natural Killer Cell skos:exactMatch hgnc 6378 KLRD1 semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/5c44c0/scripts/import_gilda_mappings.py 0.95
mesh D055752 Small Cell Lung Carcinoma skos:exactMatch doid DOID:5411 lung oat cell carcinoma semapv:ManualMappingCuration orcid:0000-0003-1307-2508
mesh D057135 Wet Macular Degeneration skos:exactMatch doid DOID:10873 Kuhnt-Junius degeneration semapv:ManualMappingCuration orcid:0000-0003-1307-2508
mesh D058494 Walker-Warburg Syndrome skos:exactMatch doid DOID:0111237 congenital muscular dystrophy-dystroglycanopathy type A1 semapv:ManualMappingCuration orcid:0000-0003-1307-2508
mesh D058570 TOR Serine-Threonine Kinases skos:exactMatch hgnc 3942 MTOR semapv:ManualMappingCuration orcid:0000-0001-9439-5346
mesh D063807 Dandruff skos:exactMatch doid DOID:8941 seborrheic infantile dermatitis semapv:ManualMappingCuration orcid:0000-0001-9439-5346
mesh D063847 Mean Platelet Volume skos:exactMatch ncit C74730 Mean Platelet Volume Measurement semapv:ManualMappingCuration orcid:0000-0001-9439-5346
mesh D063948 Enslaved Persons skos:exactMatch ncit C153898 Slavey Language semapv:ManualMappingCuration orcid:0000-0001-9439-5346
mesh D064046 Secretagogins skos:exactMatch hgnc 16941 SCGN semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/5c44c0/scripts/import_gilda_mappings.py 0.95
mesh D064429 Fatty Acid Synthases skos:exactMatch hgnc 3594 FASN semapv:ManualMappingCuration orcid:0000-0001-9439-5346 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/5c44c0/scripts/import_gilda_mappings.py 0.95
mesh D064697 Racemethionine skos:exactMatch chebi CHEBI:16811 methionine semapv:ManualMappingCuration orcid:0000-0003-4423-4370
mesh D065627 Familial Primary Pulmonary Hypertension skos:exactMatch doid DOID:14557 primary pulmonary hypertension semapv:ManualMappingCuration orcid:0000-0003-1307-2508
mesh D065637 Cytochrome P-450 CYP2A6 skos:exactMatch hgnc 2610 CYP2A6 semapv:ManualMappingCuration orcid:0000-0003-1307-2508
Expand Down
Loading
Loading