Skip to content

Commit bcd9025

Browse files
committed
Add script to generate HGNC symbol/ID mapping
1 parent 334577e commit bcd9025

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

export/hgnc_ids.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""Several files in this resource use HGNC gene symbols to identify individual
2+
genes. However, the symbols assigned to HGNC IDs can change over time, and
3+
therefore previously curated symbols can become invalid. This script
4+
generates a mapping of current (i.e. at the time of running the script)
5+
mappings of HGNC IDs to symbols so that the assumptions about the identity
6+
of the genes in the various tables can be traced."""
7+
8+
import os
9+
import csv
10+
from indra.databases import hgnc_client
11+
12+
if __name__ == '__main__':
13+
path_this = os.path.dirname(os.path.abspath(__file__))
14+
hgnc_symbols = set()
15+
# Gather all HGNC symbols from relations.csv
16+
relations_file = os.path.join(path_this, os.pardir, 'relations.csv')
17+
with open(relations_file, 'r') as f:
18+
csvreader = csv.reader(f, delimiter=str(u','), lineterminator='\r\n',
19+
quoting=csv.QUOTE_MINIMAL,
20+
quotechar=str(u'"'))
21+
for row in csvreader:
22+
ns1, id1, rel, ns2, id2 = row
23+
if ns1 == 'HGNC':
24+
hgnc_symbols.add(id1)
25+
if ns2 == 'HGNC':
26+
hgnc_symbols.add(id2)
27+
28+
# Gather all HGNC symbols from grounding_map.csv
29+
gm_file = os.path.join(path_this, os.pardir, 'grounding_map.csv')
30+
with open(gm_file, 'r') as f:
31+
csvreader = csv.reader(f, delimiter=str(u','), lineterminator='\r\n',
32+
quoting=csv.QUOTE_MINIMAL,
33+
quotechar=str(u'"'))
34+
for row in csvreader:
35+
namespaces = row[1::2]
36+
ids = row[2::2]
37+
for ns, id in zip(namespaces, ids):
38+
if ns == 'HGNC':
39+
hgnc_symbols.add(id)
40+
41+
# Create output file
42+
out_file = os.path.join(path_this, 'hgnc_symbol_map.csv')
43+
with open(out_file, 'w') as fh:
44+
for hgnc_symbol in sorted(list(hgnc_symbols)):
45+
hgnc_id = hgnc_client.get_hgnc_id(hgnc_symbol)
46+
fh.write('%s,%s\r\n' % (hgnc_symbol, hgnc_id))
47+

0 commit comments

Comments
 (0)