-
Notifications
You must be signed in to change notification settings - Fork 6
/
conv_blasr_samqv.py
executable file
·80 lines (60 loc) · 1.88 KB
/
conv_blasr_samqv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
"""Converts blasr -printSAMQV SAM format as follows:
- use subst qual for base qual
- use qi as BI with offset
- use qd as BD with offset
"""
__author__ = "Andreas Wilm"
__version__ = "0.1"
__email__ = "[email protected]"
__license__ = "The MIT License (MIT)"
import sys
import os
from collections import OrderedDict
import pysam
def conv_read(r):
# in-place editing of tags and qualities apparently not possible in
# pysam (see also FAQ)
tags = r.tags
# using a dict for convenience. also gets rid of ts duplicate
dtags = OrderedDict(tags)
# offset needed to make it BI and BD which are quals after current
# base: take 1+ and add dummy at end
bi = dtags['qi'][1:] + '#'
bd = dtags['qd'][1:] + '#'
dtags['BI'] = bi
dtags['BD'] = bd
# pacbio's base qualities are mergers of all qualities.
# we want subst quals instead
if True:
dtags['qo'] = r.qual# save original
qual = r.qual
qual = dtags['qs']
r.qual = qual
r.tags = dtags.items()
# FIXME pysam bug: rg:z: to rg:a:
# use set_tag instead
def main(sam_in, sam_out):
"""main function"""
for r in sam_in:
#print "BEFORE"
#sam_out.write(r)
conv_read(r)
#print "AFTER"
sam_out.write(r)
#sys.stderr.write("DEBUG exit\n"); sys.exit(1)
if __name__ == "__main__":
assert len(sys.argv)==3, ("Usage: %s basr-samqv-in.bam conv-out.bam" % (
os.path.basename(sys.argv[0])))
samfile_in = sys.argv[1]
sam_in = pysam.Samfile(samfile_in)
samfile_out = sys.argv[2]
if samfile_out == "-":
mode = "wb"
else:
assert not os.path.exists(samfile_out)
mode = "wb"
sam_out = pysam.Samfile(samfile_out, mode, template=sam_in)
main(sam_in, sam_out)
sam_in.close()
sam_out.close()