-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcollate_mapping_stats.py
executable file
·68 lines (46 loc) · 1.83 KB
/
collate_mapping_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/env python
import sys
import re
import os
uneak_stdout_files=sys.argv[1:]
stats_dict={}
example= """
Total number of reads in lane=243469299
Total number of good barcoded reads=199171115
"""
for filename in uneak_stdout_files:
# e.g. SQ0788_CCVK0ANXX_s_1_fastq.txt.gz.fastq.s.00005.trimmed.fastq.bwa.CELA_all_but_U.fa.B10.stats
# containing e.g.
#Mapped reads: 260087 (70.3575%)
#Forward strand: 239494 (64.7868%)
#Reverse strand: 130171 (35.2132%)
#print "DEBUG : processing %s"%filename
sample_ref = re.sub("\.txt\.gz\.fastq\.s\.\d+\.trimmed\.fastq\.bwa","",os.path.basename(filename))
sample_ref = re.sub("\.B10\.stats","",sample_ref)
map_stats = [0,0,0] # will contain count, total, percent
with open(filename,"r") as f:
for record in f:
tokens = re.split("\s+", record.strip())
#print tokens
if len(tokens) >= 5:
if (tokens[3],tokens[4]) == ("in", "total"):
map_stats[1] = float(tokens[0])
elif tokens[3] == "mapped":
map_stats[0] = float(tokens[0])
if map_stats[1] > 0:
map_stats[2] = map_stats[0]/map_stats[1]
else:
map_stats[2] = 0
break
stats_dict[sample_ref] = map_stats
print "\t".join(("sample_ref", "map_pct", "map_std"))
for sample_ref in stats_dict:
out_rec = [sample_ref,"0","0"]
(p,n) = (stats_dict[sample_ref][2], stats_dict[sample_ref][1])
q = 1-p
stddev = 0.0
if n>0:
stddev = (p * q / n ) ** .5
out_rec[1] = str(p*100.0)
out_rec[2] = str(stddev*100.0)
print "\t".join(out_rec)