-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathQCTableFormatter_V3.py
90 lines (81 loc) · 3.85 KB
/
QCTableFormatter_V3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import re
# Separator for when multiple values are nested in one cell:
delim = "|"
# Separator for output of the table:
sep = ';'
def codonQCReformatter(inFile):
formattedNames = ''
formattedNotCodList = ''
formattedProtCodList = ''
nameList = []
notCodList = []
protCodList = []
for line in inFile:
name = re.search("\".*\"", line)
coding = re.split("\".*\"", line.strip())
notCod = int(coding[1].split(",")[1])
protCod = int(coding[1].split(",")[2])
totCod = notCod+protCod
nameList.append(name.group(0)[1:-1])
notCodList.append(round(notCod/totCod, 2))
protCodList.append(round(protCod/totCod, 2))
for i in nameList:
formattedNames = formattedNames + str(i) + delim
for j in notCodList:
formattedNotCodList = formattedNotCodList + str(j) + delim
for k in protCodList:
formattedProtCodList = formattedProtCodList + str(k) + delim
return formattedNames[:-1], formattedProtCodList[:-1], formattedNotCodList[:-1]
def NCountReformatter(Ns):
percentage = []
count = []
NPercent = ''
NCount = ''
for line in Ns:
percentage.append(line.split(',')[0])
count.append(line.split(',')[1])
for i in percentage:
NPercent = NPercent + str(i) + delim
for j in count:
NCount = NCount + str(j).rstrip() + delim
return NPercent[:-1], NCount[:-1]
def ComplexityReformatter(Cmplx):
values = []
for line in Cmplx:
try:
# int(line.split(',')[1])
values.append(int(line.split(',')[1]))
except:
pass
prctCplx = round ((values[0]/(values[0]+values[1]))*100, 1)
prctNtCplx = round ((values[1]/(values[0]+values[1]))*100, 1)
return prctCplx, prctNtCplx
def letterQuality(inFile):
for line in inFile:
if line.split(',')[0] == 'A':
ct_a = line.split(',')[1]
qual_a = line.split(',')[2]
elif line.split(',')[0] == 'C':
ct_c = line.split(',')[1]
qual_c = line.split(',')[2]
if line.split(',')[0] == 'G':
ct_g = line.split(',')[1]
qual_g = line.split(',')[2]
if line.split(',')[0] == 'T':
ct_t = line.split(',')[1]
qual_t = line.split(',')[2]
else:
pass
return ct_a, qual_a.rstrip(), ct_c, qual_c.rstrip(), ct_g, qual_g.rstrip(), ct_t, qual_t.rstrip()
def inputGrabber(inFile):
HIVEIDs = ''
for line in inFile:
HIVEIDs = HIVEIDs + str(line.rstrip())
return str(HIVEIDs)
with open ("TEMPIDFile.txt") as IDList, open ("TEMPcodonQCTable.csv") as CQC, open ("TEMPcountNsPercentageTable.csv") as NCT, open ("TEMPComplexityTable.csv") as Complexity, open ("TEMPLetterCountQuality.csv") as LCQ:
IDs = inputGrabber(IDList)
codonName, codonCoding, codonNotCoding = codonQCReformatter(CQC)
NPercent, NCount = NCountReformatter(NCT)
PercentComplex, PercentNotComplex = ComplexityReformatter(Complexity)
count_a, avg_quality_a, count_c, avg_quality_c, count_g, avg_quality_g, count_t, avg_quality_t = letterQuality(LCQ)
print (IDs + sep + codonName + sep + codonCoding + sep + codonNotCoding + sep + NCount + sep + NPercent + sep + str(PercentComplex) + sep + str(PercentNotComplex) + sep + avg_quality_a + sep + avg_quality_t + sep + avg_quality_g + sep + avg_quality_c + sep + count_a + sep + count_c + sep + count_g + sep + count_t)