Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add miRBase groundings to GroundingMapper #918

Closed
wants to merge 10 commits into from
40 changes: 31 additions & 9 deletions indra/preassembler/grounding_mapper/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from copy import deepcopy
from indra.statements import Agent
from indra.databases import uniprot_client, hgnc_client, chebi_client, \
mesh_client, go_client
mesh_client, go_client, mirbase_client
from indra.util import read_unicode_csv

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -281,6 +281,7 @@ def standardize_db_refs(db_refs):
"""
up_id = db_refs.get('UP')
hgnc_id = db_refs.get('HGNC')
mirbase_id = db_refs.get('MIRBASE')
# If we have a UP ID and no HGNC ID, we try to get a gene name,
# and if possible, a HGNC ID from that
if up_id and not hgnc_id:
Expand All @@ -289,8 +290,12 @@ def standardize_db_refs(db_refs):
hgnc_id = hgnc_client.get_hgnc_id(gene_name)
if hgnc_id:
db_refs['HGNC'] = hgnc_id
# Otherwise, if we don't have a UP ID but have an HGNC ID, we try to
# get the UP ID
elif mirbase_id and not hgnc_id:
hgnc_id = mirbase_client.get_hgnc_id_from_mirbase_id(mirbase_id)
if hgnc_id:
db_refs['HGNC'] = hgnc_id
# Otherwise, if we don't have a UP ID or miRBase ID but have an HGNC
# ID, we try to get the UP ID or miRBase ID
elif hgnc_id:
# Now get the Uniprot ID for the gene
mapped_up_id = hgnc_client.get_uniprot_id(hgnc_id)
Expand All @@ -314,6 +319,17 @@ def standardize_db_refs(db_refs):
else:
db_refs['UP'] = mapped_up_id

mapped_mirbase_id = \
mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id)
if mapped_mirbase_id:
if mirbase_id and mapped_mirbase_id != mirbase_id:
msg = 'Inconsistent groundings MIRBASE:%s not equal to ' \
'MIRBASE:%s mapped from HGNC:%s, standardizing ' \
'to MIRBASE:%s'
logger.debug(msg, mirbase_id, mapped_mirbase_id,
hgnc_id, mapped_mirbase_id)
db_refs['MIRBASE'] = mapped_mirbase_id

# Now try to improve chemical groundings
pc_id = db_refs.get('PUBCHEM')
chebi_id = db_refs.get('CHEBI')
Expand Down Expand Up @@ -364,8 +380,8 @@ def standardize_db_refs(db_refs):
# further conflict to resolve.
return db_refs

@staticmethod
def standardize_agent_name(agent, standardize_refs=True):
@classmethod
def standardize_agent_name(cls, agent, standardize_refs=True):
"""Standardize the name of an Agent based on grounding information.

If an agent contains a FamPlex grounding, the FamPlex ID is used as a
Expand All @@ -391,7 +407,7 @@ def standardize_agent_name(agent, standardize_refs=True):
return

if standardize_refs:
agent.db_refs = GroundingMapper.standardize_db_refs(agent.db_refs)
agent.db_refs = cls.standardize_db_refs(agent.db_refs)

# We next look for prioritized grounding, if missing, we return
db_ns, db_id = agent.get_grounding()
Expand All @@ -405,6 +421,12 @@ def standardize_agent_name(agent, standardize_refs=True):
# get_grounding returns
elif db_ns == 'HGNC':
agent.name = hgnc_client.get_hgnc_name(db_id)
elif db_ns == 'MIRBASE':
mirbase_id = agent.db_refs['MIRBASE']
mirbase_name = \
mirbase_client.get_mirbase_name_from_mirbase_id(mirbase_id)
if mirbase_name:
agent.name = mirbase_name
elif db_ns == 'UP':
# Try for the gene name
gene_name = uniprot_client.get_gene_name(agent.db_refs['UP'],
Expand All @@ -426,8 +448,8 @@ def standardize_agent_name(agent, standardize_refs=True):
agent.name = go_name
return

@staticmethod
def rename_agents(stmts):
@classmethod
def rename_agents(cls, stmts):
"""Return a list of mapped statements with updated agent names.

Creates a new list of statements without modifying the original list.
Expand All @@ -448,7 +470,7 @@ def rename_agents(stmts):
for _, stmt in enumerate(mapped_stmts):
# Iterate over the agents
for agent in stmt.agent_list():
GroundingMapper.standardize_agent_name(agent, True)
cls.standardize_agent_name(agent, True)
return mapped_stmts


Expand Down
3 changes: 3 additions & 0 deletions indra/statements/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ def get_grounding(self):
if isinstance(hgnc, list):
hgnc = hgnc[0]
return 'HGNC', str(hgnc)
mirbase = self.db_refs.get('MIRBASE')
if mirbase:
return 'MIRBASE', mirbase
up = self.db_refs.get('UP')
if up:
if isinstance(up, list):
Expand Down
13 changes: 13 additions & 0 deletions indra/tests/test_groundingmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ def test_map_standardize_up_hgnc():
assert st.enz.db_refs['UP'] == st.sub.db_refs['UP']


def test_map_standardize_mirbase_hgnc():
a1 = Agent('MIRLET7A1', db_refs={'HGNC': '31476'})
a2 = Agent('hsa-let-7a-1', db_refs={'MIRBASE': 'MI0000060'})
stmt = Phosphorylation(a1, a2) # not real statement
mapped_stmts = gm.map_stmts([stmt])
assert len(mapped_stmts) == 1
st = mapped_stmts[0]
assert 'MIRLET7A1' == st.enz.name == st.sub.name
assert '31476' == st.enz.db_refs['HGNC'] == st.sub.db_refs['HGNC']
assert 'MI0000060' == st.enz.db_refs['MIRBASE'] \
== st.sub.db_refs['MIRBASE']


def test_map_standardize_chebi_pc():
a1 = Agent('X', db_refs={'PUBCHEM': '42611257'})
a2 = Agent('Y', db_refs={'CHEBI': 'CHEBI:63637'})
Expand Down