Skip to content

Commit

Permalink
Merge pull request #1450 from bgyori/proc_improve
Browse files Browse the repository at this point in the history
Processor improvements
  • Loading branch information
bgyori authored Jun 19, 2024
2 parents 4b7b562 + d36b079 commit 9624972
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 33 deletions.
14 changes: 7 additions & 7 deletions indra/assemblers/indranet/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,8 @@ def _simple_scorer_update(G, edge):
# Catch underflow
except FloatingPointError as err:
# Numpy precision
NP_PRECISION = 10 ** -np.finfo(np.longfloat).precision
logger.warning('%s: Resetting ag_belief to 10*np.longfloat precision '
NP_PRECISION = 10 ** -np.finfo(np.longdouble).precision
logger.warning('%s: Resetting ag_belief to 10*np.longdouble precision '
'(%.0e)' % (err, Decimal(NP_PRECISION * 10)))
ag_belief = NP_PRECISION * 10
return ag_belief
Expand All @@ -337,14 +337,14 @@ def _simple_scorer_update(G, edge):
def _complementary_belief(G, edge):
# Aggregate belief score: 1-prod(1-belief_i)
np.seterr(all='raise')
NP_PRECISION = 10 ** -np.finfo(np.longfloat).precision # Numpy precision
NP_PRECISION = 10 ** -np.finfo(np.longdouble).precision # Numpy precision
belief_list = [s['belief'] for s in G.edges[edge]['statements']]
try:
ag_belief = np.longfloat(1.0) - np.prod(np.fromiter(
map(lambda belief: np.longfloat(1.0) - belief, belief_list),
dtype=np.longfloat))
ag_belief = np.longdouble(1.0) - np.prod(np.fromiter(
map(lambda belief: np.longdouble(1.0) - belief, belief_list),
dtype=np.longdouble))
except FloatingPointError as err:
logger.warning('%s: Resetting ag_belief to 10*np.longfloat precision '
logger.warning('%s: Resetting ag_belief to 10*np.longdouble precision '
'(%.0e)' % (err, Decimal(NP_PRECISION * 10)))
ag_belief = NP_PRECISION * 10
return ag_belief
13 changes: 12 additions & 1 deletion indra/databases/hgnc_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ def get_hgnc_name_from_mgi_name(mgi_name: str) -> Union[str, None]:
def _read_hgnc_maps():
hgnc_file = get_resource_path("hgnc_entries.tsv")
csv_rows = read_unicode_csv(hgnc_file, delimiter='\t', encoding='utf-8')
hgnc_uniprot_preferred = get_resource_path("hgnc_uniprot_preferred.csv")
csv_rows_uniprot_preferred = \
read_unicode_csv(hgnc_uniprot_preferred, delimiter=',',
encoding='utf-8')
hgnc_names = {}
hgnc_ids = {}
hgnc_withdrawn = []
Expand Down Expand Up @@ -515,19 +519,26 @@ def _read_hgnc_maps():
for old_id, new_id in hgnc_withdrawn_new_ids.items():
hgnc_names[old_id] = hgnc_names[new_id]

uniprot_ids_preferred = {}
for row in csv_rows_uniprot_preferred:
hgnc_id = row[0]
uniprot_id = row[1]
uniprot_ids_preferred[hgnc_id] = uniprot_id

return (
hgnc_names, hgnc_ids, hgnc_withdrawn,
uniprot_ids, entrez_ids, entrez_ids_reverse, mouse_map, rat_map,
prev_sym_map, ensembl_ids, ensembl_ids_reverse, gene_types,
dict(hgnc_to_enzymes), dict(enzyme_to_hgncs),
uniprot_ids_preferred
)


(
hgnc_names, hgnc_ids, hgnc_withdrawn, uniprot_ids, entrez_ids,
entrez_ids_reverse, mouse_map, rat_map, prev_sym_map, ensembl_ids,
ensembl_ids_reverse, gene_type,
hgnc_to_enzymes, enzyme_to_hgncs,
hgnc_to_enzymes, enzyme_to_hgncs, uniprot_ids_preferred
) = _read_hgnc_maps()


Expand Down
12 changes: 8 additions & 4 deletions indra/ontology/bio/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class BioOntology(IndraOntology):
# should be incremented to "force" rebuilding the ontology to be consistent
# with the underlying resource files.
name = 'bio'
version = '1.33'
version = '1.34'
ontology_namespaces = [
'go', 'efo', 'hp', 'doid', 'chebi', 'ido', 'mondo', 'eccode',
]
Expand Down Expand Up @@ -147,11 +147,15 @@ def add_hgnc_uniprot_entrez_xrefs(self):
from indra.databases import hgnc_client
from indra.databases import uniprot_client
edges = []
for hid, uid in hgnc_client.uniprot_ids.items():
uids = uid.split(', ')
for hid, upid in hgnc_client.uniprot_ids.items():
uids = upid.split(', ')
preferred = hgnc_client.uniprot_ids_preferred.get(hid)
if preferred:
uids = [preferred]
for uid in uids:
edge_data = {'type': 'xref', 'source': 'hgnc'}
edges.append((self.label('HGNC', hid), self.label('UP', uid),
{'type': 'xref', 'source': 'hgnc'}))
edge_data))
self.add_edges_from(edges)

edges = [(self.label('UP', uid), self.label('HGNC', hid),
Expand Down
3 changes: 3 additions & 0 deletions indra/resources/hgnc_uniprot_preferred.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
hgnc_id,uniprot_id
17868,Q9BXH1
30377,Q14160
21 changes: 5 additions & 16 deletions indra/sources/bel/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,28 +546,17 @@ def get_db_refs_by_name(ns, name, node_data):
if up_id:
db_refs = {'UP': up_id}
# Map Selventa families and complexes to FamPlex
elif ns == 'SFAM':
elif ns in {'SFAM', 'SCOMP'}:
try:
sfam_id, xrefs = selventa_lookup[('SFAM', name)]
db_refs = {"SFAM": sfam_id}
selventa_id, xrefs = selventa_lookup[(ns, name)]
db_refs = {ns: selventa_id}
indra_name = bel_to_indra.get(name)
except KeyError:
indra_name = None
db_refs = None

if indra_name is None:
logger.info('Could not find mapping for BEL/SFAM family: '
'%s (%s)' % (name, node_data))
else:
db_refs['FPLX'] = indra_name
name = indra_name
elif ns == 'SCOMP':
scomp_id, xrefs = selventa_lookup[('SCOMP', name)]
db_refs = {'SCOMP': scomp_id}
indra_name = bel_to_indra.get(name)
if indra_name is None:
logger.info('Could not find mapping for BEL/SCOMP complex: '
'%s (%s)' % (name, node_data))
logger.info('Could not find mapping for BEL/%s family: '
'%s (%s)' % (ns, name, node_data))
else:
db_refs['FPLX'] = indra_name
name = indra_name
Expand Down
8 changes: 4 additions & 4 deletions indra/tests/test_indranet_assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ def test_to_digraph():
'Activation', 'Phosphorylation', 'Inhibition', 'IncreaseAmount'}
assert all(digraph.edges[e].get('belief', False) for e in digraph.edges)
assert all(isinstance(digraph.edges[e]['belief'],
(float, np.longfloat)) for e in digraph.edges)
(float, np.longdouble)) for e in digraph.edges)
assert all(digraph.edges[e].get('weight', False) for e in digraph.edges)
assert all(isinstance(digraph.edges[e]['weight'],
(float, np.longfloat)) for e in digraph.edges)
(float, np.longdouble)) for e in digraph.edges)
digraph_from_df = IndraNet.digraph_from_df(df)
assert nx.is_isomorphic(digraph, digraph_from_df)

Expand Down Expand Up @@ -206,11 +206,11 @@ def test_to_signed_graph():
assert all(signed_graph.edges[e].get('belief', False) for e in
signed_graph.edges)
assert all(isinstance(signed_graph.edges[e]['belief'],
(float, np.longfloat)) for e in signed_graph.edges)
(float, np.longdouble)) for e in signed_graph.edges)
assert all(signed_graph.edges[e].get('weight', False) for e in
signed_graph.edges)
assert all(isinstance(signed_graph.edges[e]['weight'],
(float, np.longfloat)) for e in signed_graph.edges)
(float, np.longdouble)) for e in signed_graph.edges)


def _weight_mapping(G):
Expand Down
2 changes: 1 addition & 1 deletion indra/tests/test_pathfinding.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _setup_unsigned_graph():
# Add belief
for e in dg.edges:
dg.edges[e]['belief'] = edge_beliefs[e]
dg.edges[e]['weight'] = -np.log(edge_beliefs[e], dtype=np.longfloat)
dg.edges[e]['weight'] = -np.log(edge_beliefs[e], dtype=np.longdouble)

# Add edge_by_hash
dg.graph['hashes'] = hashes
Expand Down

0 comments on commit 9624972

Please sign in to comment.