-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKMER.py
43 lines (31 loc) · 1006 Bytes
/
KMER.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from itertools import product
def readFile():
sequences_dict = {}
with open("data/rosalind_kmer.txt", "r") as f:
lines = f.readlines()
current_label = ""
for index, item in enumerate(lines):
if item[0] == ">":
current_label = item[1:].rstrip()
sequences_dict[current_label] = ""
else:
sequences_dict[current_label] += item.rstrip()
return sequences_dict[current_label]
def countWithOverlapping(s, t):
pos = count = 0
while pos >= 0:
pos = s.find(t, pos) + 1
if pos > 0:
count += 1
else:
return count
def countK_mers(sequence, k):
l = []
for xmer in (product('ACGT', repeat=k)):
l.append(countWithOverlapping(sequence, "".join(xmer)))
return l
if __name__ == "__main__":
sequence = readFile()
result = countK_mers(sequence, 4)
result_file = open("results/KMER.txt", "w")
result_file.write(" ".join(str(r) for r in result))