-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathLiftMap.py
143 lines (126 loc) · 4.04 KB
/
LiftMap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Code taken from https://genome.sph.umich.edu/wiki/LiftMap.py
#!/usr/bin/python
import sys, os
def usage():
print("%s -m old.map -p old.ped -d old.dat -o new.prefix" % sys.argv[0] )
print("lift old.map (optionally .ped and .dat) to new.prefix.map (.ped/.dat if possible)")
def die(msg):
print msg
sys.exit(2)
def myopen(fn):
import gzip
try:
h = gzip.open(fn)
ln = h.read(2) # read arbitrary bytes so check if @param fn is a gzipped file
except:
# cannot read in gzip format
return open(fn)
h.close()
return gzip.open(fn)
def map2bed(fin, fout):
fo = open(fout, 'w')
for ln in myopen(fin):
chrom, rs, mdist, pos = ln.split()
chrom = 'chr' + chrom
pos = int(pos)
fo.write('%s\t%d\t%d\t%s\n' % (chrom, pos-1, pos, rs))
fo.close()
return True
# global var:
LIFTED_SET = set()
UNLIFTED_SET = set()
def liftBed(fin, fout, funlifted):
params = dict()
params['LIFTOVER_BIN'] = '/usr/local/bin/liftOver'
params['OLD'] = fin
params['CHAIN'] = '/home/ryan/Data/liftover/hg18ToHg19.over.chain.gz'
params['NEW'] = fout
params['UNLIFTED'] = fout + '.unlifted'
from string import Template
cmd = Template('$LIFTOVER_BIN $OLD $CHAIN $NEW $UNLIFTED')
cmd = cmd.substitute(params)
os.system(cmd)
#record lifted/unliftd rs
for ln in myopen(params['UNLIFTED']):
if len(ln) == 0 or ln[0] == '#':continue
UNLIFTED_SET.add(ln.strip().split()[-1])
for ln in myopen(params['NEW']):
if len(ln) == 0 or ln[0] == '#':continue
LIFTED_SET.add(ln.strip().split()[-1])
return True
def bed2map(fin, fout):
fo = open(fout, 'w')
for ln in myopen(fin):
chrom, pos0, pos1, rs = ln.split()
chrom = chrom.replace('chr', '')
fo.write('%s\t%s\t0.0\t%s\n' % (chrom, rs, pos1))
fo.close()
return True
def liftDat(fin, fout):
fo = open(fout, 'w')
for ln in myopen(fin):
if len(ln) == 0 or ln[0] != 'M':
fo.write(ln)
else:
t, rs = ln.strip().split()
if rs in LIFTED_SET:
fo.write(ln)
fo.close()
return True
def liftPed(fin, fout, fOldMap):
# two ways to do it:
# 1. write unlifted snp list
# use PLINK to do this job using --exclude
# 2. alternatively, we can write our own method
# we will use method 2
marker = [i.strip().split()[1] for i in open(fOldMap)]
flag = map(lambda x: x not in UNLIFTED_SET, marker)
# print marker[:10]
# print flag[:10]
fo = open(fout, 'w')
for ln in myopen(fin):
f = ln.strip().split()
l = len(f)
f = f[:6] + [ f[i*2] + ' '+f[i*2 +1] for i in xrange(3, l/2 )]
fo.write('\t'.join(f[:6]))
fo.write('\t')
if len(f[6:]) != len(flag):
die('Inconsistent length of ped and map files')
newMarker = [m for i, m in enumerate(f[6:]) if flag[i]]
fo.write('\t'.join(newMarker))
fo.write('\n')
#print marker[:10]
#die('test')
return True
def makesure(result, succ_msg, fail_msg = "ERROR"):
if result:
print 'SUCC: ', succ_msg
else:
print 'FAIL: ', fail_msg
sys.exit(2)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='')
parser.add_argument('-m', dest='mapFile', required = True)
parser.add_argument('-p', dest='pedFile')
parser.add_argument('-d', dest='datFile')
parser.add_argument('-o', dest='prefix', required = True)
args = parser.parse_args()
oldBed = args.mapFile + '.bed'
makesure(map2bed(args.mapFile, oldBed),
'map->bed succ')
newBed = args.prefix + '.bed'
unlifted = args.prefix + '.unlifted'
makesure(liftBed(oldBed, newBed, unlifted),
'liftBed succ')
newMap = args.prefix + '.map'
makesure(bed2map(newBed, newMap),
'bed->map succ')
if args.datFile:
newDat = args.prefix + '.dat'
makesure(liftDat(args.datFile, newDat),
'liftDat succ')
if args.pedFile:
newPed = args.prefix + '.ped'
makesure(liftPed(args.pedFile, newPed, args.mapFile),
'liftPed succ')