Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BioPAX grounding improvements #390

Merged
merged 13 commits into from
Feb 6, 2018
98 changes: 76 additions & 22 deletions indra/databases/chebi_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,6 @@
from functools32 import lru_cache
from indra.util import read_unicode_csv

def read_chebi_to_pubchem():
chebi_to_pubchem_file = join(dirname(abspath(__file__)),
'../resources/chebi_to_pubchem.tsv')
csv_reader = read_unicode_csv(chebi_to_pubchem_file, delimiter='\t')
chebi_pubchem = {}
pubchem_chebi = {}
for row in csv_reader:
chebi_pubchem[row[0]] = row[1]
pubchem_chebi[row[1]] = row[0]
return chebi_pubchem, pubchem_chebi

def read_chebi_to_chembl():
chebi_to_chembl_file = join(dirname(abspath(__file__)),
'../resources/chebi_to_chembl.tsv')
csv_reader = read_unicode_csv(chebi_to_chembl_file, delimiter='\t')
chebi_chembl = {}
for row in csv_reader:
chebi_chembl[row[0]] = row[1]
return chebi_chembl

chebi_pubchem, pubchem_chebi = read_chebi_to_pubchem()
chebi_chembl = read_chebi_to_chembl()

def get_pubchem_id(chebi_id):
"""Return the PubChem ID corresponding to a given ChEBI ID.
Expand All @@ -47,6 +25,7 @@ def get_pubchem_id(chebi_id):
pubchem_id = chebi_pubchem.get(chebi_id)
return pubchem_id


def get_chebi_id_from_pubchem(pubchem_id):
"""Return the ChEBI ID corresponding to a given Pubchem ID.

Expand All @@ -64,5 +43,80 @@ def get_chebi_id_from_pubchem(pubchem_id):
chebi_id = pubchem_chebi.get(pubchem_id)
return chebi_id


def get_chembl_id(chebi_id):
"""Return a ChEMBL ID from a given ChEBI ID.

Parameters
----------
chebi_id : str
ChEBI ID to be converted.

Returns
-------
chembl_id : str
ChEMBL ID corresponding to the given ChEBI ID. If the lookup fails,
None is returned.
"""
return chebi_chembl.get(chebi_id)


def get_chebi_id_from_cas(cas_id):
"""Return a ChEBI ID corresponding to the given CAS ID.

Parameters
----------
cas_id : str
The CAS ID to be converted.

Parameters
----------
chebi_id : str
The ChEBI ID corresponding to the given CAS ID. If the lookup
fails, None is returned.
"""
return cas_chebi.get(cas_id)


def _read_chebi_to_pubchem():
chebi_to_pubchem_file = join(dirname(abspath(__file__)),
'../resources/chebi_to_pubchem.tsv')
csv_reader = read_unicode_csv(chebi_to_pubchem_file, delimiter='\t')
chebi_pubchem = {}
pubchem_chebi = {}
for row in csv_reader:
chebi_pubchem[row[0]] = row[1]
pubchem_chebi[row[1]] = row[0]
return chebi_pubchem, pubchem_chebi


def _read_chebi_to_chembl():
chebi_to_chembl_file = join(dirname(abspath(__file__)),
'../resources/chebi_to_chembl.tsv')
csv_reader = read_unicode_csv(chebi_to_chembl_file, delimiter='\t')
chebi_chembl = {}
for row in csv_reader:
chebi_chembl[row[0]] = row[1]
return chebi_chembl


def _read_cas_to_chebi():
cas_to_chebi_file = join(dirname(abspath(__file__)),
'../resources/cas_to_chebi.tsv')
csv_reader = read_unicode_csv(cas_to_chebi_file, delimiter='\t')
cas_chebi = {}
next(csv_reader)
for row in csv_reader:
cas_chebi[row[0]] = row[1]
# These are missing from the resource but appear often, so we map
# them manually
extra_entries = {'24696-26-2': '17761',
'23261-20-3': '18035',
'165689-82-7': '16618'}
cas_chebi.update(extra_entries)
return cas_chebi


chebi_pubchem, pubchem_chebi = _read_chebi_to_pubchem()
chebi_chembl = _read_chebi_to_chembl()
cas_chebi = _read_cas_to_chebi()
4 changes: 4 additions & 0 deletions indra/resources/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ grounding_agents.json
- manually curated list of grounding mappings to INDRA Agents with states
(phosphorylation, mutation, etc.)

cas_to_chebi.tsv
- Manually curated based on common occurrences in Pathway Commons data. Could
be replaced by a more comprehensive map.

Files that don't need periodical updates
========================================
amino_acids.tsv
Expand Down
Loading