diff --git a/indra/preassembler/grounding_mapper/mapper.py b/indra/preassembler/grounding_mapper/mapper.py index d3c23ce76b..5ab35bec88 100644 --- a/indra/preassembler/grounding_mapper/mapper.py +++ b/indra/preassembler/grounding_mapper/mapper.py @@ -8,7 +8,7 @@ from copy import deepcopy from indra.statements import Agent from indra.databases import uniprot_client, hgnc_client, chebi_client, \ - mesh_client, go_client + mesh_client, go_client, mirbase_client from indra.util import read_unicode_csv logger = logging.getLogger(__name__) @@ -281,6 +281,7 @@ def standardize_db_refs(db_refs): """ up_id = db_refs.get('UP') hgnc_id = db_refs.get('HGNC') + mirbase_id = db_refs.get('MIRBASE') # If we have a UP ID and no HGNC ID, we try to get a gene name, # and if possible, a HGNC ID from that if up_id and not hgnc_id: @@ -289,8 +290,12 @@ def standardize_db_refs(db_refs): hgnc_id = hgnc_client.get_hgnc_id(gene_name) if hgnc_id: db_refs['HGNC'] = hgnc_id - # Otherwise, if we don't have a UP ID but have an HGNC ID, we try to - # get the UP ID + elif mirbase_id and not hgnc_id: + hgnc_id = mirbase_client.get_hgnc_id_from_mirbase_id(mirbase_id) + if hgnc_id: + db_refs['HGNC'] = hgnc_id + # Otherwise, if we don't have a UP ID or miRBase ID but have an HGNC + # ID, we try to get the UP ID or miRBase ID elif hgnc_id: # Now get the Uniprot ID for the gene mapped_up_id = hgnc_client.get_uniprot_id(hgnc_id) @@ -314,6 +319,17 @@ def standardize_db_refs(db_refs): else: db_refs['UP'] = mapped_up_id + mapped_mirbase_id = \ + mirbase_client.get_mirbase_id_from_hgnc_id(hgnc_id) + if mapped_mirbase_id: + if mirbase_id and mapped_mirbase_id != mirbase_id: + msg = 'Inconsistent groundings MIRBASE:%s not equal to ' \ + 'MIRBASE:%s mapped from HGNC:%s, standardizing ' \ + 'to MIRBASE:%s' + logger.debug(msg, mirbase_id, mapped_mirbase_id, + hgnc_id, mapped_mirbase_id) + db_refs['MIRBASE'] = mapped_mirbase_id + # Now try to improve chemical groundings pc_id = db_refs.get('PUBCHEM') chebi_id = db_refs.get('CHEBI') @@ -364,8 +380,8 @@ def standardize_db_refs(db_refs): # further conflict to resolve. return db_refs - @staticmethod - def standardize_agent_name(agent, standardize_refs=True): + @classmethod + def standardize_agent_name(cls, agent, standardize_refs=True): """Standardize the name of an Agent based on grounding information. If an agent contains a FamPlex grounding, the FamPlex ID is used as a @@ -391,7 +407,7 @@ def standardize_agent_name(agent, standardize_refs=True): return if standardize_refs: - agent.db_refs = GroundingMapper.standardize_db_refs(agent.db_refs) + agent.db_refs = cls.standardize_db_refs(agent.db_refs) # We next look for prioritized grounding, if missing, we return db_ns, db_id = agent.get_grounding() @@ -405,6 +421,12 @@ def standardize_agent_name(agent, standardize_refs=True): # get_grounding returns elif db_ns == 'HGNC': agent.name = hgnc_client.get_hgnc_name(db_id) + elif db_ns == 'MIRBASE': + mirbase_id = agent.db_refs['MIRBASE'] + mirbase_name = \ + mirbase_client.get_mirbase_name_from_mirbase_id(mirbase_id) + if mirbase_name: + agent.name = mirbase_name elif db_ns == 'UP': # Try for the gene name gene_name = uniprot_client.get_gene_name(agent.db_refs['UP'], @@ -426,8 +448,8 @@ def standardize_agent_name(agent, standardize_refs=True): agent.name = go_name return - @staticmethod - def rename_agents(stmts): + @classmethod + def rename_agents(cls, stmts): """Return a list of mapped statements with updated agent names. Creates a new list of statements without modifying the original list. @@ -448,7 +470,7 @@ def rename_agents(stmts): for _, stmt in enumerate(mapped_stmts): # Iterate over the agents for agent in stmt.agent_list(): - GroundingMapper.standardize_agent_name(agent, True) + cls.standardize_agent_name(agent, True) return mapped_stmts diff --git a/indra/statements/agent.py b/indra/statements/agent.py index 8125fc4e8a..ad8b7f9274 100644 --- a/indra/statements/agent.py +++ b/indra/statements/agent.py @@ -131,6 +131,9 @@ def get_grounding(self): if isinstance(hgnc, list): hgnc = hgnc[0] return 'HGNC', str(hgnc) + mirbase = self.db_refs.get('MIRBASE') + if mirbase: + return 'MIRBASE', mirbase up = self.db_refs.get('UP') if up: if isinstance(up, list): diff --git a/indra/tests/test_groundingmapper.py b/indra/tests/test_groundingmapper.py index f7e70b46e3..05880674b8 100644 --- a/indra/tests/test_groundingmapper.py +++ b/indra/tests/test_groundingmapper.py @@ -30,6 +30,19 @@ def test_map_standardize_up_hgnc(): assert st.enz.db_refs['UP'] == st.sub.db_refs['UP'] +def test_map_standardize_mirbase_hgnc(): + a1 = Agent('MIRLET7A1', db_refs={'HGNC': '31476'}) + a2 = Agent('hsa-let-7a-1', db_refs={'MIRBASE': 'MI0000060'}) + stmt = Phosphorylation(a1, a2) # not real statement + mapped_stmts = gm.map_stmts([stmt]) + assert len(mapped_stmts) == 1 + st = mapped_stmts[0] + assert 'MIRLET7A1' == st.enz.name == st.sub.name + assert '31476' == st.enz.db_refs['HGNC'] == st.sub.db_refs['HGNC'] + assert 'MI0000060' == st.enz.db_refs['MIRBASE'] \ + == st.sub.db_refs['MIRBASE'] + + def test_map_standardize_chebi_pc(): a1 = Agent('X', db_refs={'PUBCHEM': '42611257'}) a2 = Agent('Y', db_refs={'CHEBI': 'CHEBI:63637'})