-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpatternClumps.py
64 lines (46 loc) · 1.22 KB
/
patternClumps.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import sys
import webbrowser
def getKmers(text,k):
#generate list of k-mers in text
kmerList = []
for i in range(len(text)-k+1):
kmerList.append(text[i:i+k])
return list(set(kmerList))
def findpos(text,pattern):
#find all positions of pattern in text
pfind = text.find(pattern)
pos = [pfind]
ind=0
while pfind >=0:
subtext = text[pos[ind]+1:]
pfind = subtext.find(pattern)
pos.append(pfind+pos[ind]+1)
ind=ind+1
output = []
for val in pos:
output.append(val)
return sorted(list(set(output)))
def main(filename):
#return the number of times a pattern appears in text
filedata = open(filename).read().split()
text = filedata[0]
k = int(filedata[1])
L = int(filedata[2])
t = int(filedata[3])
kmerList = getKmers(text,k)
kmerClumps = []
for kmer in kmerList:
occurrences = findpos(text,kmer)
if len(occurrences) >= t:
for i in range(len(occurrences)-t+1):
if (occurrences[i+t-1]-occurrences[i]) <= L:
kmerClumps.append(kmer)
break
print kmerClumps
output = ' '.join(kmerClumps)
fout = 'ANS_pattern_clumps.txt'
fh = open(fout,'w')
fh.write(output)
fh.close()
webbrowser.open(fout)
main(sys.argv[1])