|
| 1 | +"""Several files in this resource use HGNC gene symbols to identify individual |
| 2 | +genes. However, the symbols assigned to HGNC IDs can change over time, and |
| 3 | +therefore previously curated symbols can become invalid. This script |
| 4 | +generates a mapping of current (i.e. at the time of running the script) |
| 5 | +mappings of HGNC IDs to symbols so that the assumptions about the identity |
| 6 | +of the genes in the various tables can be traced.""" |
| 7 | + |
| 8 | +import os |
| 9 | +import csv |
| 10 | +from indra.databases import hgnc_client |
| 11 | + |
| 12 | +if __name__ == '__main__': |
| 13 | + path_this = os.path.dirname(os.path.abspath(__file__)) |
| 14 | + hgnc_symbols = set() |
| 15 | + # Gather all HGNC symbols from relations.csv |
| 16 | + relations_file = os.path.join(path_this, os.pardir, 'relations.csv') |
| 17 | + with open(relations_file, 'r') as f: |
| 18 | + csvreader = csv.reader(f, delimiter=str(u','), lineterminator='\r\n', |
| 19 | + quoting=csv.QUOTE_MINIMAL, |
| 20 | + quotechar=str(u'"')) |
| 21 | + for row in csvreader: |
| 22 | + ns1, id1, rel, ns2, id2 = row |
| 23 | + if ns1 == 'HGNC': |
| 24 | + hgnc_symbols.add(id1) |
| 25 | + if ns2 == 'HGNC': |
| 26 | + hgnc_symbols.add(id2) |
| 27 | + |
| 28 | + # Gather all HGNC symbols from grounding_map.csv |
| 29 | + gm_file = os.path.join(path_this, os.pardir, 'grounding_map.csv') |
| 30 | + with open(gm_file, 'r') as f: |
| 31 | + csvreader = csv.reader(f, delimiter=str(u','), lineterminator='\r\n', |
| 32 | + quoting=csv.QUOTE_MINIMAL, |
| 33 | + quotechar=str(u'"')) |
| 34 | + for row in csvreader: |
| 35 | + namespaces = row[1::2] |
| 36 | + ids = row[2::2] |
| 37 | + for ns, id in zip(namespaces, ids): |
| 38 | + if ns == 'HGNC': |
| 39 | + hgnc_symbols.add(id) |
| 40 | + |
| 41 | + # Create output file |
| 42 | + out_file = os.path.join(path_this, 'hgnc_symbol_map.csv') |
| 43 | + with open(out_file, 'w') as fh: |
| 44 | + for hgnc_symbol in sorted(list(hgnc_symbols)): |
| 45 | + hgnc_id = hgnc_client.get_hgnc_id(hgnc_symbol) |
| 46 | + fh.write('%s,%s\r\n' % (hgnc_symbol, hgnc_id)) |
| 47 | + |
0 commit comments