-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminimizer.py
79 lines (63 loc) · 2.16 KB
/
minimizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from collections import deque
from typing import List
import random
import time
mapping = {"A": 0,
"C": 1,
"G": 2,
"T": 3}
def order_kmer(seq: str):
order = 0
for i, letter in enumerate(seq[::-1]):
if letter not in mapping:
raise ValueError("Letter not in ACGT")
order += 4**i * mapping[letter]
return order
def find_minimizer_in_window(seq_window: str, k: int):
min_order = order_kmer(seq_window[:k])
min_pos = 0
for i in range(len(seq_window) - k + 1):
order = order_kmer(seq_window[i: i + k])
if order < min_order:
min_order = order
min_pos = i
return min_pos
def find_minimizers(seq: str, w: int, k: int) -> List[int]:
minimizers = []
for window in range(len(seq) - (w + k - 1) + 1):
minimizers.append(find_minimizer_in_window(seq[window: window + (w + k - 1)], k) + window)
return minimizers
def minimizerSlidingWindow(seq: str, w: int, k: int) -> List[int]:
deq = deque()
result = []
left = 0
for right in range(len(seq) - k + 1):
# pop larger elements
while deq and order_kmer(seq[right: right + k]) < \
order_kmer(seq[deq[-1]: deq[-1] + k]):
deq.pop()
# add new element to queue
deq.append(right)
# pop expired values
if left > deq[0]:
deq.popleft()
# start appending the result
if right >= w - 1:
result.append(deq[0])
left += 1
return result
if __name__ == "__main__":
ns = [100, 1000, 5000, 10_000, 50_000, 100_000] # Different values of n to test
w = 40
k = 15
for n in ns:
seq = "".join([random.choice("ATGC") for _ in range(n)])
start_time = time.time()
find_minimizers(seq, w, k)
end_time = time.time()
print(f"find_minimizers with n={n}: {end_time - start_time:.6f} seconds")
start_time = time.time()
minimizerSlidingWindow(seq, w, k)
end_time = time.time()
print(f"minimizerSlidingWindow with n={n}: {end_time - start_time:.6f} seconds")
print()